1 Raw data

data <- read.csv("/home/bambrozi/2_CORTISOL/Data/T4_Elora_Data_04_25_2024.csv")

# Replace "treated" with NA
data$T4Cortisol[data$T4Cortisol == "treated" | data$T4Cortisol == "Treated at T2" | data$T4Cortisol == "treated at T2"] <- NA
# Convert the column to numeric, coercing non-numeric values to NA
data$T4Cortisol <- as.numeric(as.character(data$T4Cortisol))
#Filtering only the lines with values
data <- data[!is.na(data$T4Cortisol),]
#creating new data file cleaned  
write.csv(data, "/home/bambrozi/2_CORTISOL/Data/data_clean.csv", row.names = F)

print(data)

2 Continuous Phenotype

# Summary Statistics
summary(data$T4Cortisol)
# Histogram
hist(data$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")
# Boxplot
boxplot(data$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")
# Density Plot
plot(density(data$T4Cortisol), main = "Density Plot of T4 Cortisol", xlab = "T4 Cortisol", ylab = "Density")
# Calculate the theoretical quantiles
qqnorm(data$T4Cortisol, main = "QQ Plot of T4Cortisol", xlim = c(min(qqnorm(data$T4Cortisol)$x), max(qqnorm(data$T4Cortisol)$x)), ylim = c(min(qqnorm(data$T4Cortisol)$y), max(qqnorm(data$T4Cortisol)$y) + 2 * IQR(qqnorm(data$T4Cortisol)$y)))
# Add the QQ line
qqline(data$T4Cortisol, col = "red")

Summary statistics My Image

Histogram My Image

Density My Image

Box_Plot My Image

qq_Plot My Image

Shapiro-Wilk normality test My Image

2.1 Categorical Phenotype

I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.

My Image

data$Categorical <- ifelse(data$T4Cortisol >= 956, "H", 
                           ifelse(data$T4Cortisol <= 190.8, "L", "M"))

table(data$Categorical)
library(ggplot2)

# Reorder the levels of the 'Categorical' column
data$Categorical <- factor(data$Categorical, levels = c("L", "M", "H"))

# Create the histogram with reordered categories
ggplot(data, aes(x = Categorical, fill = Categorical)) +
  geom_bar() +
  labs(title = "Histogram of T4Cortisol by Category",
       x = "Category",
       y = "Frequency") +
  theme_minimal()

# Create the histogram
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
  geom_histogram(binwidth = 50, color = "black", alpha = 0.7) + # Adjust binwidth as needed
  labs(title = "Histogram of T4Cortisol with Color by Category",
       x = "T4 Cortisol",
       y = "Frequency",
       fill = "Category") +
  scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
  theme_minimal()

# Create the density plot
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
  geom_density(alpha = 0.3) +
  labs(title = "Density Plot of T4Cortisol with Color by Category",
       x = "T4Cortisol",
       y = "Density",
       fill = "Category") +
  scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
  theme_minimal()

# Create a density plot
ggplot(data, aes(x = T4Cortisol)) +
  geom_density() +
  geom_vline(xintercept = c(956, 190.8), linetype = "dashed", color = "red") +
  labs(title = "Density Plot of T4Cortisol with Vertical Lines",
       x = "T4Cortisol",
       y = "Density") +
  theme_minimal()

The animals were sorted in these three categories >H = Hight >M = Medium >L = Low

My Image

The individuals frequency distribution in theese categories are shown in the plots below

My Image My Image My Image My Image

2.2 Removing “outliers”

Observing the previous plots I tried to remove the “outliers” phenotypes above 1250, but the outcome from Shapiro test is still indicating no normality of the data.

library(tidyverse)

data_no_out <- filter(data, T4Cortisol < 1250)

# Create QQ plot
qqnorm(data_no_out$T4Cortisol, main = "QQ Plot of T4Cortisol", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(data$T4Cortisol, col = "red")

boxplot(data_no_out$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")

hist(data_no_out$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")

shapiro.test(data$T4Cortisol)

My Image My Image My Image My Image

3 GENOTYPES

Lucas Alcântara sent me the path to the genotype and pedigree files: /data/cgil/daiclu/6_Data/database/public_output/bruno

My Image

In this folder we found the following files:

I made a copy of this files in a folder called Raw_files:

/home/bambrozi/2_CORTISOL/RawFiles

This directory has two sub-directories:

4 GWAS

4.1 Checking with Phenotyped Animals also have Genotype

library(data.table)

pheno <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")
ped <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
geno <- fread("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
geno <- geno[,c("V2")]

#Bringing cdn_id to my phenotype file
#Generate a index with the match
matching_indices <- match(pheno$ID, ped$elora_id)
# Then, assign 'cdn_id' from 'ped' to 'pheno' where there are matches
pheno$cdn_id <- ifelse(!is.na(matching_indices), ped$cdn_id[matching_indices], NA)

#Making a phenotype file only with genotyped animals
pheno_genotyped <- pheno[pheno$cdn_id %in% geno$V2,] 

#check if all animals in this file are genotyped
checkk <- pheno_genotyped$cdn_id %in% geno$V2
sum(checkk)

4.2 Generating a Phenotype file

The phenotype file should have three columns: FID, Animal_id, Phenotype

HO <- rep("HO", 252)

pheno_gwas <- as.data.frame(cbind(HO, pheno_genotyped$cdn_id, pheno_genotyped$T4Cortisol))

colnames(pheno_gwas) <- c("FID", "cdn_id", "cortisol")

pheno_gwas$cdn_id <-  as.numeric(pheno_gwas$cdn_id)
pheno_gwas$cortisol <- round(as.numeric(pheno_gwas$cortisol),2)

write.table(pheno_gwas, "/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", quote = F, row.names = F, col.names = T)

4.3 Adjusting the SNP_map to .map

map <- fread("/data/cgil/daiclu/6_Data/database/public_output/bruno/DGVsnpinfo.2404.ho")
morgan <- data.frame(X0 = rep(0, 45101))
mapa=as.data.frame(cbind(map$chr, map$snp_name, morgan$X0, map$location))
head(mapa)
write.table(x = mapa, file = "/home/bambrozi/2_CORTISOL/Geno_files/genoplink.map", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)

4.4 Generating the bfiles

system("/home/local/bin/plink --cow --nonfounders --allow-no-sex --keep-allele-order --file /home/bambrozi/2_CORTISOL/Geno_files/genoplink --make-bed --out /home/bambrozi/2_CORTISOL/Geno_files/genoplink")
With the code above I generated the bfiles:
    genoplink.bed
  • genoplink.bim
  • genoplink.fam
  • genoplink.log
  • genoplink.nosex

5 Quality Control

We ran the code below to perfom the QC

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files/genoplink
RESULT=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean

/home/local/bin/plink \
    --bfile ${DATA} \
    --cow \
    --allow-no-sex \
    --hwe 1e-5 \
    --maf 0.01 \
    --geno 0.1 \
    --mind 0.1 \
    --keep-allele-order \
    --make-bed \
    --out ${RESULT}
    

The server screen outcome is shown below. My Image

After the Quality Control we ended up with

6 KING

To check for duplicated individuals I performed the KINSHIP analysis using one script from Larissa Braga. Running the King Analysis on Plink.

#!/bin/bash

DATA=/home/bambrozi/Extrm_ARS1_GrassHill_1/GENOTYPES/ONLY_GRASSHILL_AND_PHENO_after_QC/only_grasshill_and_pheno_clean
RESULT=/home/bambrozi/Extrm_ARS1_GrassHill_1/GENOTYPES/KING/result_king

plink2 \
    --bfile ${DATA} \
    --chr-set 29 \
    --make-king-table \
    --out ${RESULT}

This is the output screen on terminal:

My Image

The table below is the output /home/bambrozi/2_CORTISOL/Geno_files_after_KING/result_king.kin0 and have pairwise comparisons between all individuals.

My Image

Now we should open in R and check for individuals with more than 0,354, to perform this we can use the code below, also provided by Larissa Braga:

setwd("/home/bambrozi/2_CORTISOL/Geno_files_after_KING")

relatedness="result_king.kin0" ## change accordingly!!

library(data.table)

print(relatedness)
rel=fread(relatedness, h = T)
head(rel)

print("Individuals with different identifications above the cut off of 0.354:")
dup=subset(rel, KINSHIP >= 0.354  & IID1!=IID2)
print(dup)
nrow(dup)

So the code above will provide this output if there is any duplicated individual.

My Image

We do not have any duplicated individual

So the file to be used are those in the directory /home/bambrozi/2_CORTISOL/Geno_files_after_QC

files:genoplink_clean

After Quality Control we didn’t lost any animal, so we don’t need to update our phenotype file

7 PCA

Now before performing the PCA analysis we need to change the FID for those individuals that has phenotype = 1 for Nadia.

#!/bin/bash

DATA=/home/bambrozi/Extrm_ARS1_GrassHill_1/PCA/imput_pca
RESULT=/home/bambrozi/Extrm_ARS1_GrassHill_1/PCA/pca_result

plink \
    --bfile ${DATA} \
    --keep-allele-order \
    --chr-set 29 \
    --pca \
    --out ${RESULT}

The PCA output:

My Image

7.1 PCA Plot

After generate the Eigenvalues and Eigenvectors I used the code below to generate the PCA Plot

setwd("/home/bambrozi/2_CORTISOL/PCA")

library(ggplot2) 
library(tidyverse)

##
# Visualize PCA results
###

# read in result files
eigenValues <- read_delim("pca_result.eigenval", delim = " ", col_names = F)
eigenVectors <- read_delim("pca_result.eigenvec", delim = " ", col_names = F)

## Proportion of variation captured by each vector
eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)


# PCA plot
png("pca-plink.eng.png", width=5, height=5, units="in", res=300)
ggplot(data = eigenVectors) +
  geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 1, alpha = 1) +
  geom_hline(yintercept = 0, linetype="dotted") +
  geom_vline(xintercept = 0, linetype="dotted") +
  labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
       y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
  theme_minimal()
dev.off()


# PCA plot with animal ids
png("pca-plink.eng.animals_id.png", width=50, height=50, units="in", res=300)
ggplot(data = eigenVectors) +
  geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 5, alpha = 1) +
  geom_text(mapping = aes(x = X3, y = X4, label = X2), size = 2, hjust = 0, vjust = 0) +  # Add labels for animal IDs
  geom_hline(yintercept = 0, linetype="dotted") +
  geom_vline(xintercept = 0, linetype="dotted") +
  labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
       y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
  theme_minimal()
dev.off()

My Image

8 GWAS on GCTA

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt

/home/local/bin/gcta \
    --bfile ${DATA} \
    --mlma-loco \
    --pheno ${PHENO} \
    --autosome-num 29 \
    --autosome \
    --out ${RESULT}

This is te output from the code above:

My Image

9 5%

9.1 Manhattan Plot - Bonferroni

gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma", 
                  head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf<- -log10(0.05/nrow(gwas))

library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20, 
               cex=1, lwd=1, ylab="", xlab="Chromossomes", 
               main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni correction"), bty="n")

The code above creates a Manhattam Plot, the correction for multiple test is the Bonferroni correction

My Image

The code below will save information of Significant SNP.

library(tidyverse)

SNP_sig_bonf <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

Below we can see the 1 significant SNP for Bonferroni correction.

My Image

9.2 FDR

gwas$bh <- p.adjust(gwas$p, method = "BH") #FDR 

The code below will create a file with the significant snp for FDR-BH

SNP_sig_BH <- filter(gwas, bh < 0.05)
write.table(SNP_sig_BH, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_BH.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

As outcome we can see that the FDR-BH method didn’t increase the number of significant SNP.

My Image

9.3 Corrected Bonferroni for genome independent segments

Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma", 
                  head=T, stringsAsFactors = F)

gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}

bonf<- -log10(0.05/Me)

library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20, 
               cex=1, lwd=1, ylab="", xlab="Chromossomes", 
               main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")


library(tidyverse)

SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

My Image

Below we can find the list of significant SNPs after the correction for Multiple Testing

My Image

10 qqPlot

I performed the qqPlot analysis using the code below:

gwas<- read.table("GWAS_result.loco.mlma", h=T)
ps<- gwas$p
inflation <- function(ps) {
  chisq <- qchisq(1 - ps, 1)
  lambda <- median(chisq) / qchisq(0.5, 1)
  lambda
}
# Calculating the lambda -  the lambda statistic should be close to 1 if
#the points fall within the expected range, or greater than one if 
# the observed p-values are more significant than expected.
inflation(ps)

bonf<- -log10(0.05/nrow(gwas))

gwas$log<- -log10(gwas$p)

gg_qqplot <- function(ps, ci = 0.95) {
  n  <- length(ps)
  df <- data.frame(
    observed = -log10(sort(ps)),
    expected = -log10(ppoints(n)),
    clower   = -log10(qbeta(p = (1 - ci) / 2, shape1 = 1:n, shape2 = n:1)),
    cupper   = -log10(qbeta(p = (1 + ci) / 2, shape1 = 1:n, shape2 = n:1))
  )
  log10Pe <- expression(paste("Expected -log"[10], plain(P)))
  log10Po <- expression(paste("Observed -log"[10], plain(P)))
  ggplot(df) +
    geom_ribbon(
      mapping = aes(x = expected, ymin = clower, ymax = cupper),
      alpha = 0.1
    ) +
    geom_point(aes(expected, observed), shape = 1, size = 3) +
    geom_abline(intercept = 0, slope = 1, alpha = 0.5) +
    # geom_line(aes(expected, cupper), linetype = 2, size = 0.5) +
    # geom_line(aes(expected, clower), linetype = 2, size = 0.5) +
    xlab(log10Pe) +
    ylab(log10Po)
}

## plot -----
gg_qqplot(ps) +
  theme_bw(base_size = 24) +
  annotate(
    geom = "text",
    x = -Inf,
    y = Inf,
    hjust = -0.15,
    vjust = 1 + 0.15 * 3,
    label = sprintf("λ = %.2f", inflation(ps)),
    size = 8
  ) +
  theme(
    axis.ticks = element_line(size = 0.5),
    panel.grid = element_blank()
    # panel.grid = element_line(size = 0.5, color = "grey80")
  )

The outcome is the qqplot below, and the lambda value is \(\lambda\) = 1.03

My Image

11 VEP - Variant Effect Prediction

After insert all rsID on VEP the summary is shown below:

My Image

Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:

rs42217767 ps. this variant wasn’t recovered automaticaly by VEP, so I typed the chr and position on the search area of the “genome viewer” My Image

rs110031217 My Image

rs110991998 My Image

rs42089058 My Image

rs41644634 My Image

rs41567074 My Image

ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.

12 GALLO

# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#import MARKER files = the GWAS output
gwas <- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt", 
                   head=T, stringsAsFactors = F)

# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))


colnames(gwas) <- c("CHR","SNP", "BP")


#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/out_qtl_50k.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/out_qtl_50k_clean.csv")

Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html

Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index

The GALLO output are bellow:

For GENES

X CHR SNP BP chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 11 BTB-01060598 32301594 11 32278324 32766620 488297 - ENSBTAG00000046199 NRXN1 protein_coding
2 11 BTB-01060598 32301594 11 32280416 32280512 97 - ENSBTAG00000044573 U6 snRNA
3 22 ARS-BFGL-NGS-26419 46052082 22 45924535 46818511 893977 - ENSBTAG00000013117 CACNA2D3 protein_coding
4 22 ARS-BFGL-NGS-26419 46052082 22 46051998 46062657 10660 + ENSBTAG00000013124 LRTM1 protein_coding
5 22 ARS-BFGL-NGS-23411 5134078 22 5091037 5184321 93285 + ENSBTAG00000019832 TGFBR2 protein_coding
6 26 BTB-00926636 17857480 26 17703169 17846407 143239 - ENSBTAG00000011743 TLL2 protein_coding
7 26 BTB-00926636 17857480 26 17851539 17912665 61127 - ENSBTAG00000016298 TM9SF3 protein_coding
8 26 BTA-62125-no-rs 17899619 26 17851539 17912665 61127 - ENSBTAG00000016298 TM9SF3 protein_coding
9 26 BTA-62125-no-rs 17899619 26 17925643 18029878 104236 - ENSBTAG00000019872 PIK3AP1 protein_coding
10 28 BTA-99379-no-rs 41666186 28 41609015 41649016 40002 - ENSBTAG00000007540 GLUD1 protein_coding
11 28 BTA-99379-no-rs 41666186 28 41650333 41763371 113039 + ENSBTAG00000019194 SHLD2 protein_coding

FOR QTLs

X CHR SNP BP QTL_type start_pos end_pos QTL_ID
1 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5109498 5109502 151609
2 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5134076 5134080 151620
3 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5173197 5173201 151601
4 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51802
5 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51803
6 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51804
7 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51805
8 22 ARS-BFGL-NGS-26419 46052082 Production 46052080 46052084 51806
9 22 ARS-BFGL-NGS-26419 46052082 Production 46052080 46052084 51807
10 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51808
11 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51809
12 22 ARS-BFGL-NGS-26419 46052082 Health 46052080 46052084 51810
13 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51811
14 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51812
15 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51813
16 22 ARS-BFGL-NGS-26419 46052082 Milk 46073517 46073521 243365
17 26 BTB-00926636 17857480 Milk 17809768 17809772 195264
18 26 BTB-00926636 17857480 Milk 17809768 17809772 200269
19 26 BTB-00926636 17857480 Milk 17809768 17809772 200270
20 26 BTB-00926636 17857480 Milk 17809768 17809772 205295
21 26 BTB-00926636 17857480 Milk 17810363 17810367 198859
22 26 BTB-00926636 17857480 Milk 17810363 17810367 203868
23 26 BTB-00926636 17857480 Milk 17810363 17810367 203869
24 26 BTB-00926636 17857480 Milk 17810363 17810367 210297
25 26 BTB-00926636 17857480 Milk 17811021 17811025 198858
26 26 BTB-00926636 17857480 Milk 17811021 17811025 203866
27 26 BTB-00926636 17857480 Milk 17811021 17811025 203867
28 26 BTB-00926636 17857480 Milk 17811021 17811025 210296
29 26 BTB-00926636 17857480 Milk 17811725 17811729 197493
30 26 BTB-00926636 17857480 Milk 17811725 17811729 202704
31 26 BTB-00926636 17857480 Milk 17812273 17812277 195685
32 26 BTB-00926636 17857480 Milk 17812273 17812277 200785
33 26 BTB-00926636 17857480 Milk 17813212 17813216 195285
34 26 BTB-00926636 17857480 Milk 17813212 17813216 200301
35 26 BTB-00926636 17857480 Milk 17813978 17813982 196236
36 26 BTB-00926636 17857480 Milk 17813978 17813982 201468
37 26 BTB-00926636 17857480 Milk 17815237 17815241 198002
38 26 BTB-00926636 17857480 Milk 17815237 17815241 203190
39 26 BTB-00926636 17857480 Milk 17817223 17817227 34750
40 26 BTB-00926636 17857480 Milk 17817223 17817227 195586
41 26 BTB-00926636 17857480 Milk 17821297 17821301 197235
42 26 BTB-00926636 17857480 Milk 17821297 17821301 202455
43 26 BTB-00926636 17857480 Milk 17822209 17822213 197783
44 26 BTB-00926636 17857480 Milk 17822209 17822213 202983
45 26 BTB-00926636 17857480 Milk 17823105 17823109 196204
46 26 BTB-00926636 17857480 Milk 17823105 17823109 201419
47 26 BTB-00926636 17857480 Milk 17824770 17824774 196867
48 26 BTB-00926636 17857480 Milk 17824770 17824774 202101
49 26 BTB-00926636 17857480 Milk 17833551 17833555 195748
50 26 BTB-00926636 17857480 Milk 17833551 17833555 200855
51 26 BTB-00926636 17857480 Milk 17833551 17833555 206213
52 26 BTB-00926636 17857480 Milk 17865069 17865073 197144
53 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 197144
54 26 BTB-00926636 17857480 Milk 17865069 17865073 202367
55 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 202367
56 26 BTB-00926636 17857480 Milk 17865069 17865073 208485
57 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 208485
58 26 BTB-00926636 17857480 Milk 17871201 17871205 197611
59 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 197611
60 26 BTB-00926636 17857480 Milk 17871201 17871205 202816
61 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 202816
62 26 BTB-00926636 17857480 Milk 17871201 17871205 209076
63 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 209076
64 26 BTB-00926636 17857480 Milk 17885871 17885875 63656
65 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 63656
66 26 BTB-00926636 17857480 Milk 17885871 17885875 199068
67 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 199068
68 26 BTB-00926636 17857480 Milk 17885871 17885875 204113
69 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 204113
70 26 BTB-00926636 17857480 Milk 17885871 17885875 204114
71 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 204114
72 26 BTB-00926636 17857480 Milk 17885871 17885875 210399
73 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 210399
74 26 BTB-00926636 17857480 Milk 17895273 17895277 63657
75 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 63657
76 26 BTB-00926636 17857480 Milk 17895273 17895277 199069
77 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 199069
78 26 BTB-00926636 17857480 Milk 17895273 17895277 204115
79 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 204115
80 26 BTB-00926636 17857480 Milk 17895273 17895277 204116
81 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 204116
82 26 BTB-00926636 17857480 Milk 17895273 17895277 210400
83 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 210400
84 26 BTB-00926636 17857480 Milk 17895803 17895807 199070
85 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 199070
86 26 BTB-00926636 17857480 Milk 17895803 17895807 204117
87 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 204117
88 26 BTB-00926636 17857480 Milk 17895803 17895807 204118
89 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 204118
90 26 BTB-00926636 17857480 Milk 17895803 17895807 210401
91 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 210401
92 26 BTB-00926636 17857480 Milk 17900304 17900308 199071
93 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 199071
94 26 BTB-00926636 17857480 Milk 17900304 17900308 204119
95 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 204119
96 26 BTB-00926636 17857480 Milk 17900304 17900308 204120
97 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 204120
98 26 BTB-00926636 17857480 Milk 17900304 17900308 210402
99 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 210402
100 26 BTB-00926636 17857480 Milk 17902932 17902936 63658
101 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 63658
102 26 BTB-00926636 17857480 Milk 17902932 17902936 199072
103 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 199072
104 26 BTB-00926636 17857480 Milk 17902932 17902936 204121
105 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 204121
106 26 BTB-00926636 17857480 Milk 17902932 17902936 204122
107 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 204122
108 26 BTB-00926636 17857480 Milk 17902932 17902936 210403
109 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 210403
110 26 BTB-00926636 17857480 Milk 17906861 17906865 63660
111 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 63660
112 26 BTB-00926636 17857480 Milk 17906861 17906865 199074
113 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 199074
114 26 BTB-00926636 17857480 Milk 17906861 17906865 204125
115 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 204125
116 26 BTB-00926636 17857480 Milk 17906861 17906865 204126
117 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 204126
118 26 BTB-00926636 17857480 Milk 17906861 17906865 210405
119 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 210405
120 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 63654
121 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 199076
122 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 204129
123 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 204130
124 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 210407
125 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 63655
126 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 197826
127 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 203026
128 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 203027
129 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 209354
130 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 195860
131 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 201008
132 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 201009
133 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 206462
134 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 33984
135 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195668
136 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195669
137 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195670
138 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 200758
139 26 BTA-62125-no-rs 17899619 Milk 17931209 17931213 199092
140 26 BTA-62125-no-rs 17899619 Milk 17931209 17931213 204151
141 28 BTA-99379-no-rs 41666186 Reproduction 41646434 41646438 107048
142 28 BTA-99379-no-rs 41666186 Reproduction 41646434 41646438 107227

12.1 QTL annotation on GALLO

QTL type

#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)

My Image

QTL type

#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")

par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")

Reproduction My Image

Health My Image

12.2 QTL enrichment on GALLO

#GALLO
#QTL enrichment analysis 
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]

write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/out_enrich_qtl_genome_name.csv")

out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/out_enrich_qtl_genome_type.csv")

#Plots

#Name

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
5 Milk C14 index 35 4437 108 163224 0.0000000 0.0000000 Milk
9 Milk myristoleic acid content 26 3047 108 163224 0.0000000 0.0000000 Milk
10 Milk palmitoleic acid content 15 2422 108 163224 0.0000000 0.0000000 Milk
6 Milk C16 index 12 2002 108 163224 0.0000000 0.0000001 Milk
12 Muscle carnosine content 3 67 108 163224 0.0000131 0.0000497 Meat and Carcass
14 Pregnancy rate 2 944 108 163224 0.1296627 0.4105985 Reproduction
18 Teat length 1 300 108 163224 0.1802436 0.4892327 Exterior
15 Rear leg placement - side view 1 430 108 163224 0.2479752 0.5235032 Exterior
17 Stillbirth 2 1363 108 163224 0.2280294 0.5235032 Reproduction
3 Foot angle 1 672 108 163224 0.3596272 0.6162877 Exterior
7 Milk capric acid content 1 912 108 163224 0.4541067 0.6162877 Milk
8 Milk myristic acid content 1 902 108 163224 0.4504612 0.6162877 Milk
13 Net merit 1 903 108 163224 0.4508269 0.6162877 Production
19 Udder depth 1 695 108 163224 0.3693423 0.6162877 Exterior
16 Somatic cell score 1 1122 108 163224 0.5253603 0.6654564 Health
2 Conception rate 1 1255 108 163224 0.5656375 0.6716945 Reproduction
1 Calving ease 2 3819 108 163224 0.7219243 0.7776744 Reproduction
4 Length of productive life 1 2004 108 163224 0.7367441 0.7776744 Production
11 Milk yield 1 6432 108 163224 0.9870080 0.9870080 Milk

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Exterior 4 9077 108 163224 0.8569775 0.9999953
2 Health 1 5889 108 163224 0.9811250 0.9999953
3 Meat and Carcass 3 18258 108 163224 0.9997109 0.9999953
4 Milk 91 75352 108 163224 0.0000000 0.0000000
5 Production 2 19640 108 163224 0.9999848 0.9999953
6 Reproduction 7 35008 108 163224 0.9999953 0.9999953

My Image

13 GPROFILER

online version: https://biit.cs.ut.ee/gprofiler/gost

I got a script from Julia Rodrigues about the R package GPROFILER to perform an enrichment of my Genes.

### enriquecimento genico
#install.packages("gprofiler2")
library(gprofiler2)

#Para conferir a lista de organism -> https://biit.cs.ut.ee/gprofiler/page/organism-list

#Obs: eu entro com os ids ENSOAR...
query <- read.table ("/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.txt", header = T)
query <- query[,c("gene_id")]

gene_enrich <- gost(
  query,
  organism = "btaurus", 
  ordered_query = FALSE,
  multi_query = FALSE,
  significant = TRUE,
  exclude_iea = FALSE,
  measure_underrepresentation = FALSE,
  evcodes = TRUE,
  user_threshold = 0.05,
  correction_method = c("fdr"),
  domain_scope = c("annotated"),
  numeric_ns = "",
  sources = NULL,
  as_short_link = FALSE,
  highlight = FALSE
)


#str(gene_enrich) # para ver o formato dos meus dados

#selecionando apenas as informações da lista que me interessam para fazer meu data.frame 
result_enrich <- data.frame(gene_enrich$result)
result_enrich <- data.frame(Category = result_enrich$source,
                            ID = result_enrich$term_id,
                            Term = result_enrich$term_name,
                            adj_pvalue = result_enrich$p_value,
                            id_ensembl = result_enrich$intersection)

write.table(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/gene_enrich.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=F)
write.csv(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/gene_enrich.csv")

Below we can se the significant terms of this enrichment (output):

X Category ID Term adj_pvalue id_ensembl
1 GO:BP GO:0006538 glutamate catabolic process 0.0383792 ENSBTAG00000007540
2 GO:BP GO:0003274 endocardial cushion fusion 0.0383792 ENSBTAG00000019832
3 GO:BP GO:1905005 regulation of epithelial to mesenchymal transition involved in endocardial cushion formation 0.0383792 ENSBTAG00000019832
4 GO:BP GO:1905007 positive regulation of epithelial to mesenchymal transition involved in endocardial cushion formation 0.0383792 ENSBTAG00000019832
5 GO:BP GO:0003430 growth plate cartilage chondrocyte growth 0.0383792 ENSBTAG00000019832
6 GO:BP GO:0003431 growth plate cartilage chondrocyte development 0.0383792 ENSBTAG00000019832
7 GO:BP GO:0003433 chondrocyte development involved in endochondral bone morphogenesis 0.0383792 ENSBTAG00000019832
8 GO:BP GO:0003415 chondrocyte hypertrophy 0.0383792 ENSBTAG00000019832
9 GO:BP GO:0048631 regulation of skeletal muscle tissue growth 0.0383792 ENSBTAG00000011743
10 GO:BP GO:0051138 positive regulation of NK T cell differentiation 0.0383792 ENSBTAG00000019832
11 GO:BP GO:0062043 positive regulation of cardiac epithelial to mesenchymal transition 0.0383792 ENSBTAG00000019832
12 GO:BP GO:0051136 regulation of NK T cell differentiation 0.0383792 ENSBTAG00000019832
13 GO:BP GO:0003186 tricuspid valve morphogenesis 0.0383792 ENSBTAG00000019832
14 GO:BP GO:0062042 regulation of cardiac epithelial to mesenchymal transition 0.0383792 ENSBTAG00000019832
15 GO:BP GO:0060434 bronchus morphogenesis 0.0383792 ENSBTAG00000019832
16 GO:BP GO:0060440 trachea formation 0.0383792 ENSBTAG00000019832
17 GO:BP GO:0060462 lung lobe development 0.0383792 ENSBTAG00000019832
18 GO:BP GO:0060463 lung lobe morphogenesis 0.0383792 ENSBTAG00000019832
19 GO:BP GO:0048632 negative regulation of skeletal muscle tissue growth 0.0383792 ENSBTAG00000011743
20 GO:BP GO:0003175 tricuspid valve development 0.0383792 ENSBTAG00000019832
21 GO:BP GO:0061520 Langerhans cell differentiation 0.0383792 ENSBTAG00000019832
22 GO:BP GO:1905317 inferior endocardial cushion morphogenesis 0.0383792 ENSBTAG00000019832
23 GO:BP GO:2000563 positive regulation of CD4-positive, alpha-beta T cell proliferation 0.0383792 ENSBTAG00000019832
24 GO:BP GO:1990428 miRNA transport 0.0383792 ENSBTAG00000019832
25 GO:BP GO:0002513 tolerance induction to self antigen 0.0383792 ENSBTAG00000019832
26 GO:BP GO:0002514 B cell tolerance induction 0.0383792 ENSBTAG00000019832
27 GO:BP GO:0003149 membranous septum morphogenesis 0.0383792 ENSBTAG00000019832
28 GO:BP GO:1990086 lens fiber cell apoptotic process 0.0383792 ENSBTAG00000019832
29 GO:BP GO:0002520 immune system development 0.0383792 ENSBTAG00000019832,ENSBTAG00000019194
30 GO:BP GO:0002666 positive regulation of T cell tolerance induction 0.0383792 ENSBTAG00000019832
31 GO:BP GO:0002651 positive regulation of tolerance induction to self antigen 0.0383792 ENSBTAG00000019832
32 GO:BP GO:0061343 cell adhesion involved in heart morphogenesis 0.0383792 ENSBTAG00000019832
33 GO:BP GO:0002649 regulation of tolerance induction to self antigen 0.0383792 ENSBTAG00000019832
34 GO:BP GO:0002661 regulation of B cell tolerance induction 0.0383792 ENSBTAG00000019832
35 GO:BP GO:0002664 regulation of T cell tolerance induction 0.0383792 ENSBTAG00000019832
36 GO:BP GO:0002663 positive regulation of B cell tolerance induction 0.0383792 ENSBTAG00000019832
37 GO:BP GO:0002684 positive regulation of immune system process 0.0428818 ENSBTAG00000019832,ENSBTAG00000019872,ENSBTAG00000019194
38 GO:BP GO:0048630 skeletal muscle tissue growth 0.0431888 ENSBTAG00000011743
39 GO:BP GO:0051251 positive regulation of lymphocyte activation 0.0431888 ENSBTAG00000019832,ENSBTAG00000019194
40 GO:BP GO:0034154 toll-like receptor 7 signaling pathway 0.0431888 ENSBTAG00000019872
41 GO:BP GO:0060439 trachea morphogenesis 0.0431888 ENSBTAG00000019832
42 GO:BP GO:0002517 T cell tolerance induction 0.0431888 ENSBTAG00000019832
43 GO:BP GO:0002645 positive regulation of tolerance induction 0.0431888 ENSBTAG00000019832
44 GO:BP GO:0060433 bronchus development 0.0460331 ENSBTAG00000019832
45 GO:BP GO:0003418 growth plate cartilage chondrocyte differentiation 0.0460331 ENSBTAG00000019832
46 GO:BP GO:0002696 positive regulation of leukocyte activation 0.0482941 ENSBTAG00000019832,ENSBTAG00000019194
47 GO:BP GO:0001865 NK T cell differentiation 0.0489635 ENSBTAG00000019832
48 GO:BP GO:0050867 positive regulation of cell activation 0.0496277 ENSBTAG00000019832,ENSBTAG00000019194
49 GO:BP GO:0003198 epithelial to mesenchymal transition involved in endocardial cushion formation 0.0496277 ENSBTAG00000019832
50 GO:BP GO:0002643 regulation of tolerance induction 0.0496277 ENSBTAG00000019832
51 GO:BP GO:2001034 positive regulation of double-strand break repair via nonhomologous end joining 0.0496277 ENSBTAG00000019194
52 GO:MF GO:0004352 glutamate dehydrogenase (NAD+) activity 0.0098128 ENSBTAG00000007540
53 GO:MF GO:0004353 glutamate dehydrogenase [NAD(P)+] activity 0.0098128 ENSBTAG00000007540
54 GO:MF GO:0016639 oxidoreductase activity, acting on the CH-NH2 group of donors, NAD or NADP as acceptor 0.0098128 ENSBTAG00000007540
55 GO:MF GO:0005026 transforming growth factor beta receptor activity, type II 0.0147171 ENSBTAG00000019832
56 GO:MF GO:0048185 activin binding 0.0326677 ENSBTAG00000019832
57 GO:MF GO:0036312 phosphatidylinositol 3-kinase regulatory subunit binding 0.0326677 ENSBTAG00000019872
58 GO:MF GO:0034713 type I transforming growth factor beta receptor binding 0.0326677 ENSBTAG00000019832
59 GO:MF GO:0017002 activin receptor activity 0.0326677 ENSBTAG00000019832
60 GO:MF GO:0005024 transforming growth factor beta receptor activity 0.0326677 ENSBTAG00000019832
61 GO:MF GO:0050431 transforming growth factor beta binding 0.0461234 ENSBTAG00000019832
62 GO:MF GO:0043548 phosphatidylinositol 3-kinase binding 0.0461234 ENSBTAG00000019872
63 GO:MF GO:0016638 oxidoreductase activity, acting on the CH-NH2 group of donors 0.0461234 ENSBTAG00000007540
64 GO:MF GO:0005160 transforming growth factor beta receptor binding 0.0461234 ENSBTAG00000019832
65 GO:MF GO:0004675 transmembrane receptor protein serine/threonine kinase activity 0.0461234 ENSBTAG00000019832
66 REAC REAC:R-BTA-2243919 Crosslinking of collagen fibrils 0.0440084 ENSBTAG00000011743
67 REAC REAC:R-BTA-2173788 Downregulation of TGF-beta receptor signaling 0.0440084 ENSBTAG00000019832
68 REAC REAC:R-BTA-8964539 Glutamate and glutamine metabolism 0.0440084 ENSBTAG00000007540
69 REAC REAC:R-BTA-112308 Presynaptic depolarization and calcium channel opening 0.0440084 ENSBTAG00000013117
70 REAC REAC:R-BTA-2151201 Transcriptional activation of mitochondrial biogenesis 0.0440084 ENSBTAG00000007540
71 REAC REAC:R-BTA-2173791 TGF-beta receptor signaling in EMT (epithelial to mesenchymal transition) 0.0440084 ENSBTAG00000019832

13.1 GPROFILER ON-LINE

From the online version of GPROFILER i got the following results.

Legend My Image

My Image

My Image

Additionaly I perfomed a another version of this analysis but with no eletronic GO annotation

My Image

My Image

13.2 Terms distribution Pie Chart

Now I’m gonna make a pie chart with the categories

library(ggplot2)

# Assuming sig_enrich$Category contains the categories
category_counts <- table(result_enrich$Category)

# Create a pie chart
pie_chart <- ggplot(data = NULL, aes(x = factor(1), fill = names(category_counts), y = as.numeric(category_counts))) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Distribution among terms",
       x = NULL,
       y = NULL,
       fill = "Category") +
  theme_void() +
  theme(legend.position = "right")

# Print the pie chart
print(pie_chart)

My Image

14 Haplotype Blocks

First I need to generate the .ped file

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file

plink \
    --bfile ${DATA} \
    --cow \
    --recode \
    --out ${RESULT}

Second, is necessary to convert the .ped file to Linkage format:

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/hapblock_in

plink \
    --file ${DATA} \
    --cow \
    --recode HV \
    --out ${RESULT}

The code above will generate a pair of files .ped and .info for each chromosome

I ran the haploview only for chromosome 11, 22, 26, 28 that are when the significant SNP are located.

As the LD Plot bring the SNP ordered, first I looked at /home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt, to know the SNP position, and then compare with the Check Markers tab in the haploview SNP position x SNP# in plot

My Image

Chr SNP bp A1 A2 Freq Haploview.order
11 BTB-01060598 32301594 C A 0.4503970 680
22 ARS-BFGL-NGS-23411 5134078 A C 0.1865080 88
22 ARS-BFGL-NGS-26419 46052082 A C 0.0813492 762
26 BTB-00926636 17857480 C A 0.0257937 296
26 BTA-62125-no-rs 17899619 C A 0.0257937 297
28 BTA-99379-no-rs 41666186 A C 0.0496032 701

The two significant SNPs from chromosome 26 fell within a Haploblock.

Chr 11 (SNP 680) My Image

Chr 22 (SNP 88) My Image

Chr 22 (SNP 762) My Image

Chr 26 (SNP 296 and 297) My Image

Chr 28 (SNP 701) My Image

14.1 BLOCK 10 in Chr 26

As we found an haploblock on chromosome 26 I decided take a look inside this block. So, on Haploview I checked the name and position of the first SNP of this block (292) and the last (300).

X. Name Position ObsHET PredHET HWpval X.Geno FamTrio MendErr MAF Alleles
292 ARS-BFGL-NGS-111577 17714604 0.401 0.448 0.1161 100 0 0 0.339 A:C
293 ARS-BFGL-NGS-21408 17746468 0.369 0.403 0.2260 100 0 0 0.280 C:A
294 ARS-BFGL-NGS-16328 17778908 0.056 0.054 1.0000 100 0 0 0.028 A:C
295 ARS-BFGL-NGS-117063 17817225 0.413 0.452 0.2022 100 0 0 0.345 C:A
296 BTB-00926636 17857480 0.052 0.050 1.0000 100 0 0 0.026 A:C
297 BTA-62125-no-rs 17899619 0.052 0.050 1.0000 100 0 0 0.026 A:C
298 ARS-BFGL-NGS-110679 17953795 0.397 0.433 0.2211 100 0 0 0.317 A:C
299 ARS-BFGL-NGS-72889 17989430 0.052 0.050 1.0000 100 0 0 0.026 C:A
300 BTB-00926868 18040673 0.385 0.420 0.2319 100 0 0 0.300 A:C

Then I checked on NCBI’s Genome Data Viewer which genes are inside this block, adding 50k before the first SNP and 50K after the last SNP.

This interval has 426,069bp and has this genes inside:

  • DNTT
  • OPALIN
  • TLL2
  • TM9SF3
  • PIK3AP1

As you can see in the image below:

My Image

The genes TLL2, TM9SF3 and PIK3AP1 already were in the gene list from GALLO, but the genes DNTT and OPALIN were new!

I performed a GPROFILER analysis only for these genes spotted in this Block 10.

My Image

15 5 - 15%

After check the outcome for 5% of significance for SNPs, Prof. Flávio asked me to carry out again considering the interval between 5% and 15% of significance.

15.1 Corrected Bonferroni for genome independent segments

Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma", 
                  head=T, stringsAsFactors = F)

gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}

bonf05<- -log10(0.05/Me)
bonf15<- -log10(0.15/Me)

library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20, 
               cex=1, lwd=1, ylab="", xlab="Chromossomes", 
               main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf05), col="black")
abline(h=(bonf15), col="red2")
legend("topleft", inset=c(0.05, 0.05), col="red2", lwd=2, 
       legend=c("15% sig. Bonferroni corr. for ind. segments"), bty="n")
legend("topleft", inset=c(0.05, 0.10), col="black", lwd=2, 
       legend=c("5% sig. Bonferroni corr. for ind. segments"), bty="n")

library(tidyverse)

#Recovering the significant SNPs > <5%
SNP_sig_bonf_ind_seg_5 <- filter(gwas, logP > bonf05)
write.csv(SNP_sig_bonf_ind_seg_5, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_5.csv")


#Recovering the significant SNPs > 15% and <5%
SNP_sig_bonf_ind_seg_5_15 <- filter(gwas, logP > bonf15 & logP < bonf05)
write.csv(SNP_sig_bonf_ind_seg_5_15, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_5_15.csv")


#Recovering the significant SNPs > 15% 
SNP_sig_bonf_ind_seg_15 <- filter(gwas, logP > bonf15)
write.csv(SNP_sig_bonf_ind_seg_15, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_15.csv")

My Image

Below we can find the list of significant SNPs after the correction for Multiple Testing

Chr X X15. X5. X5.15. logP
28 BTA-99379-no-rs X X 6.610970
22 ARS-BFGL-NGS-23411 X X 5.485541
22 ARS-BFGL-NGS-26419 X X 5.272074
26 BTA-62125-no-rs X X 4.732155
26 BTB-00926636 X X 4.732155
11 BTB-01060598 X X 4.682532
20 BTB-01471673 X X 4.182974
23 Hapmap54016-rs29022819 X X 4.016453
28 ARS-BFGL-NGS-11935 X X 3.986177
28 Hapmap43005-BTA-64606 X X 3.986177
11 BTA-122098-no-rs X X 3.980028
19 ARS-BFGL-NGS-29119 X X 3.925461
14 Hapmap39823-BTA-35254 X X 3.905277
19 ARS-BFGL-NGS-28901 X X 3.888442
3 ARS-BFGL-NGS-4585 X X 3.885943
14 UA-IFASA-7664 X X 3.882503

To make easier to check the name of the significant SNPs I made the following Manhattan Plot

# Prepare the dataset
don <- gwas %>% 
  group_by(Chr) %>% 
  summarise(chr_len = max(bp)) %>% 
  mutate(tot = cumsum(as.numeric(chr_len)) - as.numeric(chr_len)) %>%
  select(-chr_len) %>%
  left_join(gwas, ., by = c("Chr" = "Chr")) %>%
  arrange(Chr, bp) %>%
  mutate(BPcum = as.numeric(bp) + tot) %>%
  mutate(is_highlight = ifelse(logP > bonf15, "yes", "no")) %>%
  mutate(is_annotate = ifelse(logP > bonf15, "yes", "no"))

# Remove rows with missing BPcum values
don <- don %>% filter(!is.na(BPcum))

# Prepare X axis
axisdf <- don %>% 
  group_by(Chr) %>% 
  summarize(center = (max(BPcum) + min(BPcum)) / 2)

# Find the maximum logP value for setting y-axis limits
max_logP <- max(don$logP, na.rm = TRUE)

# Make the plot
ggplot(don, aes(x = BPcum, y = logP)) +
  geom_point(aes(color = as.factor(Chr)), alpha = 0.8, size = 1.3) +
  scale_color_manual(values = rep(c("grey", "skyblue"), 22)) +
  scale_x_continuous(label = axisdf$Chr, breaks = axisdf$center) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, max_logP + 1)) + # Set y-axis limit
  geom_hline(yintercept = bonf05, color = "black") +  # Add horizontal line for bonf05
  geom_hline(yintercept = bonf15, color = "red2") +   # Add horizontal line for bonf15
  geom_point(data = subset(don, is_highlight == "yes"), color = "orange", size = 2) +
  geom_text_repel(data = subset(don, is_annotate == "yes"), aes(label = SNP), size = 2) +
  annotate("text", x = Inf, y = Inf, label = "15% sig. Bonferroni corr. for ind. segments", 
           color = "red2", hjust = 1.1, vjust = 2, size = 3) +
  annotate("text", x = Inf, y = Inf, label = "5% sig. Bonferroni corr. for ind. segments", 
           color = "black", hjust = 1.1, vjust = 3.5, size = 3) +
  theme_bw() +
  theme(
    legend.position = "none",
    panel.border = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  ) +
  labs(title = "GWAS Cortisol", x = "Chromosomes", y = "-log10(p)")

My Image

Generating a file with the GWAS output and rsID

library(tidyverse)

# Extracting SNP names to use in the SNPChimp
SNP_NAMES <- SNP_sig_bonf_ind_seg_15 %>% pull(SNP)
SNP_NAMES <- paste(SNP_NAMES, collapse = ", ")
print(SNP_NAMES)

# Copy this printed names and paste on SNPChimp

# Opening the SNPChimp output in R
rsID <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/SNPchimp_result_ind_seg_15.tsv")
rsID <- rsID[,c("rs","SNP_name")]

#Merging in one file the GWAS output and rsID
SNP_rsID_sig_bonf_ind_seg_15 <- merge(SNP_sig_bonf_ind_seg_15, rsID, by.x = "SNP", by.y = "SNP_name")
SNP_rsID_sig_bonf_ind_seg_15 <- SNP_rsID_sig_bonf_ind_seg_15[, c("rs", "SNP", "Chr", "bp", "A1", "A2", "Freq", "b", "se", "p", "logP")]
write.csv(SNP_rsID_sig_bonf_ind_seg_15, "/home/bambrozi/2_CORTISOL/GWAS/SNP_rsID_bonf_ind_seg_15.csv")

Bellow we can see the table with the SNP names and rsID for all significant SNPs with p-value lower than 0,15.

X rs SNP Chr bp A1 A2 Freq b se p logP
1 rs109578343 ARS-BFGL-NGS-11935 28 44942879 A C 0.0297619 412.261 106.1750 0.0001032 3.986177
2 rs110031217 ARS-BFGL-NGS-23411 22 5134078 A C 0.1865080 207.116 44.5110 0.0000033 5.485541
3 rs110991998 ARS-BFGL-NGS-26419 22 46052082 A C 0.0813492 308.357 67.7592 0.0000053 5.272074
4 rs109751680 ARS-BFGL-NGS-28901 19 42338148 C A 0.3710320 141.156 36.8765 0.0001293 3.888442
5 rs41577586 ARS-BFGL-NGS-29119 19 49744592 A C 0.4047620 -142.084 36.9169 0.0001187 3.925461
6 rs109888380 ARS-BFGL-NGS-4585 3 107160292 A C 0.3353170 -144.488 37.7607 0.0001300 3.885943
7 rs41624254 BTA-122098-no-rs 11 31882807 A C 0.4444440 131.779 33.9688 0.0001047 3.980028
8 rs41644634 BTA-62125-no-rs 26 17899619 C A 0.0257937 487.149 113.7690 0.0000185 4.732155
9 rs41567074 BTA-99379-no-rs 28 41666186 A C 0.0496032 435.809 84.4338 0.0000002 6.610970
10 rs42089058 BTB-00926636 26 17857480 C A 0.0257937 487.149 113.7690 0.0000185 4.732155
11 rs42217767 BTB-01060598 11 32301594 C A 0.4503970 -152.746 35.8860 0.0000208 4.682532
12 rs42590312 BTB-01471673 20 11545685 A C 0.2460320 169.707 42.5157 0.0000656 4.182974
13 rs41632193 Hapmap39823-BTA-35254 14 64061208 A C 0.2916670 -138.029 35.9698 0.0001244 3.905277
14 rs41648479 Hapmap43005-BTA-64606 28 45805105 A C 0.0297619 412.261 106.1750 0.0001032 3.986177
15 rs29022819 Hapmap54016-rs29022819 23 33609998 C A 0.1904760 173.228 44.4201 0.0000963 4.016453
16 rs41632223 UA-IFASA-7664 14 64299714 C A 0.2678570 -141.443 36.9840 0.0001311 3.882503

16 qqPlot

17 VEP - Variant Effect Prediction

After insert all rsID on VEP the summary is shown below:

My Image

As can be seen, the VEP only analysed 14 variants

Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:

rs41648479 My Image

rs41632223 My Image

rs41632193 My Image

rs42590312 My Image

rs42089058 My Image

rs41567074 My Image

rs41644634 My Image

rs41624254 My Image

rs109888380 My Image

rs41577586 My Image

rs109751680 My Image

rs110991998 My Image

rs110031217 My Image

rs109578343 My Image

ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.

18 GALLO

# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#import MARKER files = the GWAS output
gwas <- read.csv(file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_15.csv")

# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))


colnames(gwas) <- c("CHR","SNP", "BP")


#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_genes_50k_15.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_genes_50k_15.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_qtl_50k_15.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_qtl_50k_clean_15.csv")

Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html

Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index

The GALLO output are bellow:

For GENES

X CHR SNP BP chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 3 ARS-BFGL-NGS-4585 107160292 3 107109148 107110689 1542 + ENSBTAG00000030338 GJA9 protein_coding
2 3 ARS-BFGL-NGS-4585 107160292 3 107111822 107117831 6010 + ENSBTAG00000030337 MYCBP protein_coding
3 3 ARS-BFGL-NGS-4585 107160292 3 107121920 107136338 14419 + ENSBTAG00000009368 RRAGC protein_coding
4 11 BTB-01060598 32301594 11 32278324 32766620 488297 - ENSBTAG00000046199 NRXN1 protein_coding
5 11 BTB-01060598 32301594 11 32280416 32280512 97 - ENSBTAG00000044573 U6 snRNA
6 14 Hapmap39823-BTA-35254 64061208 14 64047857 64103385 55529 + ENSBTAG00000017833 RNF19A protein_coding
7 14 UA-IFASA-7664 64299714 14 64289853 64398324 108472 + ENSBTAG00000019793 RGS22 protein_coding
8 19 ARS-BFGL-NGS-29119 49744592 19 49662140 49766195 104056 + ENSBTAG00000052072 B3GNTL1 protein_coding
9 19 ARS-BFGL-NGS-29119 49744592 19 49768681 49927209 158529 - ENSBTAG00000015414 TBCD protein_coding
10 19 ARS-BFGL-NGS-28901 42338148 19 42283857 42299780 15924 - ENSBTAG00000001195 KCNH4 protein_coding
11 19 ARS-BFGL-NGS-28901 42338148 19 42302320 42303571 1252 - ENSBTAG00000000665 HCRT protein_coding
12 19 ARS-BFGL-NGS-28901 42338148 19 42307188 42312293 5106 - ENSBTAG00000000666 GHDC protein_coding
13 19 ARS-BFGL-NGS-28901 42338148 19 42319170 42357910 38741 - ENSBTAG00000010125 STAT5B protein_coding
14 19 ARS-BFGL-NGS-28901 42338148 19 42387164 42390885 3722 + ENSBTAG00000052763 NA lncRNA
15 22 ARS-BFGL-NGS-26419 46052082 22 45924535 46818511 893977 - ENSBTAG00000013117 CACNA2D3 protein_coding
16 22 ARS-BFGL-NGS-26419 46052082 22 46051998 46062657 10660 + ENSBTAG00000013124 LRTM1 protein_coding
17 22 ARS-BFGL-NGS-23411 5134078 22 5091037 5184321 93285 + ENSBTAG00000019832 TGFBR2 protein_coding
18 23 Hapmap54016-rs29022819 33609998 23 33618285 33618377 93 + ENSBTAG00000045279 SNORA70 snoRNA
19 26 BTB-00926636 17857480 26 17703169 17846407 143239 - ENSBTAG00000011743 TLL2 protein_coding
20 26 BTB-00926636 17857480 26 17851539 17912665 61127 - ENSBTAG00000016298 TM9SF3 protein_coding
21 26 BTA-62125-no-rs 17899619 26 17851539 17912665 61127 - ENSBTAG00000016298 TM9SF3 protein_coding
22 26 BTA-62125-no-rs 17899619 26 17925643 18029878 104236 - ENSBTAG00000019872 PIK3AP1 protein_coding
23 28 BTA-99379-no-rs 41666186 28 41609015 41649016 40002 - ENSBTAG00000007540 GLUD1 protein_coding
24 28 BTA-99379-no-rs 41666186 28 41650333 41763371 113039 + ENSBTAG00000019194 SHLD2 protein_coding
25 28 Hapmap43005-BTA-64606 45805105 28 45757443 45768564 11122 + ENSBTAG00000012393 AGT protein_coding
26 28 Hapmap43005-BTA-64606 45805105 28 45772443 45814853 42411 - ENSBTAG00000020314 COG2 protein_coding

FOR QTLs

X CHR SNP BP QTL_type start_pos end_pos QTL_ID
1 3 ARS-BFGL-NGS-4585 107160292 Reproduction 107191525 107191529 40198
2 3 ARS-BFGL-NGS-4585 107160292 Exterior 107191525 107191529 40199
3 3 ARS-BFGL-NGS-4585 107160292 Reproduction 107191525 107191529 40200
4 3 ARS-BFGL-NGS-4585 107160292 Exterior 107191525 107191529 40201
5 3 ARS-BFGL-NGS-4585 107160292 Milk 107191525 107191529 40202
6 3 ARS-BFGL-NGS-4585 107160292 Milk 107191525 107191529 40203
7 3 ARS-BFGL-NGS-4585 107160292 Production 107191525 107191529 40204
8 3 ARS-BFGL-NGS-4585 107160292 Production 107191525 107191529 40205
9 3 ARS-BFGL-NGS-4585 107160292 Milk 107191525 107191529 40206
10 3 ARS-BFGL-NGS-4585 107160292 Milk 107191525 107191529 40207
11 3 ARS-BFGL-NGS-4585 107160292 Exterior 107191525 107191529 40208
12 3 ARS-BFGL-NGS-4585 107160292 Reproduction 107191525 107191529 40209
13 3 ARS-BFGL-NGS-4585 107160292 Health 107191525 107191529 40210
14 3 ARS-BFGL-NGS-4585 107160292 Reproduction 107191525 107191529 40211
15 3 ARS-BFGL-NGS-4585 107160292 Exterior 107191525 107191529 40212
16 3 ARS-BFGL-NGS-4585 107160292 Exterior 107191525 107191529 40213
17 11 BTA-122098-no-rs 31882807 Milk 31870044 31870048 122275
18 14 Hapmap39823-BTA-35254 64061208 Milk 64013785 64013789 247433
19 14 Hapmap39823-BTA-35254 64061208 Milk 64013858 64013862 241580
20 14 Hapmap39823-BTA-35254 64061208 Milk 64013858 64013862 252031
21 14 Hapmap39823-BTA-35254 64061208 Milk 64013897 64013901 241079
22 14 Hapmap39823-BTA-35254 64061208 Milk 64013897 64013901 251434
23 14 Hapmap39823-BTA-35254 64061208 Milk 64013952 64013956 241853
24 14 Hapmap39823-BTA-35254 64061208 Milk 64013952 64013956 252403
25 14 Hapmap39823-BTA-35254 64061208 Milk 64015726 64015730 105628
26 14 Hapmap39823-BTA-35254 64061208 Milk 64017424 64017428 252151
27 14 Hapmap39823-BTA-35254 64061208 Milk 64017970 64017974 18873
28 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 18872
29 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 105335
30 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 173792
31 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 175101
32 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 176088
33 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 176487
34 14 Hapmap39823-BTA-35254 64061208 Milk 64061206 64061210 252723
35 14 Hapmap39823-BTA-35254 64061208 Milk 64063422 64063426 175102
36 14 Hapmap39823-BTA-35254 64061208 Milk 64068329 64068333 105336
37 14 Hapmap39823-BTA-35254 64061208 Milk 64081490 64081494 104637
38 14 Hapmap39823-BTA-35254 64061208 Milk 64081490 64081494 104890
39 14 Hapmap39823-BTA-35254 64061208 Health 64081490 64081494 179830
40 14 Hapmap39823-BTA-35254 64061208 Milk 64087234 64087238 175103
41 14 Hapmap39823-BTA-35254 64061208 Milk 64087234 64087238 252869
42 14 Hapmap39823-BTA-35254 64061208 Milk 64087450 64087454 252628
43 14 Hapmap39823-BTA-35254 64061208 Milk 64088569 64088573 252926
44 14 Hapmap39823-BTA-35254 64061208 Milk 64103583 64103587 104638
45 14 Hapmap39823-BTA-35254 64061208 Milk 64103583 64103587 104891
46 14 UA-IFASA-7664 64299714 Milk 64272869 64272873 102476
47 14 UA-IFASA-7664 64299714 Milk 64272869 64272873 104484
48 14 UA-IFASA-7664 64299714 Milk 64272869 64272873 104863
49 14 UA-IFASA-7664 64299714 Health 64272869 64272873 179764
50 14 UA-IFASA-7664 64299714 Milk 64283263 64283267 175108
51 14 UA-IFASA-7664 64299714 Milk 64283263 64283267 251800
52 14 UA-IFASA-7664 64299714 Milk 64283947 64283951 251398
53 14 UA-IFASA-7664 64299714 Milk 64288968 64288972 247633
54 14 UA-IFASA-7664 64299714 Milk 64288968 64288972 252142
55 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 18875
56 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 105660
57 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 158137
58 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 173793
59 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 175109
60 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 176085
61 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 176486
62 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 250009
63 14 UA-IFASA-7664 64299714 Milk 64299712 64299716 252724
64 14 UA-IFASA-7664 64299714 Milk 64302757 64302761 251934
65 14 UA-IFASA-7664 64299714 Milk 64308608 64308612 252948
66 14 UA-IFASA-7664 64299714 Milk 64317001 64317005 252652
67 14 UA-IFASA-7664 64299714 Milk 64322496 64322500 251752
68 14 UA-IFASA-7664 64299714 Meat_and_Carcass 64327478 64327482 20269
69 14 UA-IFASA-7664 64299714 Meat_and_Carcass 64327478 64327482 20311
70 14 UA-IFASA-7664 64299714 Meat_and_Carcass 64327478 64327482 20393
71 14 UA-IFASA-7664 64299714 Meat_and_Carcass 64327478 64327482 20464
72 14 UA-IFASA-7664 64299714 Milk 64327904 64327908 32596
73 14 UA-IFASA-7664 64299714 Milk 64327904 64327908 32634
74 14 UA-IFASA-7664 64299714 Milk 64328322 64328326 252206
75 14 UA-IFASA-7664 64299714 Milk 64339118 64339122 251497
76 14 UA-IFASA-7664 64299714 Milk 64343288 64343292 252287
77 19 ARS-BFGL-NGS-28901 42338148 Milk 42292941 42292945 147657
78 19 ARS-BFGL-NGS-28901 42338148 Milk 42292941 42292945 147675
79 19 ARS-BFGL-NGS-28901 42338148 Milk 42322679 42322683 173073
80 19 ARS-BFGL-NGS-28901 42338148 Milk 42322679 42322683 173074
81 19 ARS-BFGL-NGS-28901 42338148 Milk 42322679 42322683 173075
82 19 ARS-BFGL-NGS-28901 42338148 Milk 42322679 42322683 173076
83 19 ARS-BFGL-NGS-28901 42338148 Milk 42322679 42322683 173077
84 19 ARS-BFGL-NGS-28901 42338148 Milk 42322679 42322683 173237
85 19 ARS-BFGL-NGS-28901 42338148 Milk 42338146 42338150 166545
86 19 ARS-BFGL-NGS-28901 42338148 Milk 42338146 42338150 166546
87 19 ARS-BFGL-NGS-28901 42338148 Milk 42338146 42338150 253605
88 19 ARS-BFGL-NGS-28901 42338148 Milk 42349650 42349654 147630
89 19 ARS-BFGL-NGS-29119 49744592 Milk 49702578 49702582 195323
90 19 ARS-BFGL-NGS-29119 49744592 Milk 49702578 49702582 195324
91 19 ARS-BFGL-NGS-29119 49744592 Milk 49702578 49702582 195325
92 19 ARS-BFGL-NGS-29119 49744592 Milk 49702578 49702582 205408
93 19 ARS-BFGL-NGS-29119 49744592 Meat_and_Carcass 49715846 49715850 107754
94 19 ARS-BFGL-NGS-29119 49744592 Milk 49735877 49735881 196362
95 19 ARS-BFGL-NGS-29119 49744592 Milk 49772086 49772090 195342
96 20 BTB-01471673 11545685 Production 11498161 11498165 65554
97 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5109498 5109502 151609
98 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5134076 5134080 151620
99 22 ARS-BFGL-NGS-23411 5134078 Meat_and_Carcass 5173197 5173201 151601
100 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51802
101 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51803
102 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51804
103 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51805
104 22 ARS-BFGL-NGS-26419 46052082 Production 46052080 46052084 51806
105 22 ARS-BFGL-NGS-26419 46052082 Production 46052080 46052084 51807
106 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51808
107 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51809
108 22 ARS-BFGL-NGS-26419 46052082 Health 46052080 46052084 51810
109 22 ARS-BFGL-NGS-26419 46052082 Reproduction 46052080 46052084 51811
110 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51812
111 22 ARS-BFGL-NGS-26419 46052082 Exterior 46052080 46052084 51813
112 22 ARS-BFGL-NGS-26419 46052082 Milk 46073517 46073521 243365
113 26 BTB-00926636 17857480 Milk 17809768 17809772 195264
114 26 BTB-00926636 17857480 Milk 17809768 17809772 200269
115 26 BTB-00926636 17857480 Milk 17809768 17809772 200270
116 26 BTB-00926636 17857480 Milk 17809768 17809772 205295
117 26 BTB-00926636 17857480 Milk 17810363 17810367 198859
118 26 BTB-00926636 17857480 Milk 17810363 17810367 203868
119 26 BTB-00926636 17857480 Milk 17810363 17810367 203869
120 26 BTB-00926636 17857480 Milk 17810363 17810367 210297
121 26 BTB-00926636 17857480 Milk 17811021 17811025 198858
122 26 BTB-00926636 17857480 Milk 17811021 17811025 203866
123 26 BTB-00926636 17857480 Milk 17811021 17811025 203867
124 26 BTB-00926636 17857480 Milk 17811021 17811025 210296
125 26 BTB-00926636 17857480 Milk 17811725 17811729 197493
126 26 BTB-00926636 17857480 Milk 17811725 17811729 202704
127 26 BTB-00926636 17857480 Milk 17812273 17812277 195685
128 26 BTB-00926636 17857480 Milk 17812273 17812277 200785
129 26 BTB-00926636 17857480 Milk 17813212 17813216 195285
130 26 BTB-00926636 17857480 Milk 17813212 17813216 200301
131 26 BTB-00926636 17857480 Milk 17813978 17813982 196236
132 26 BTB-00926636 17857480 Milk 17813978 17813982 201468
133 26 BTB-00926636 17857480 Milk 17815237 17815241 198002
134 26 BTB-00926636 17857480 Milk 17815237 17815241 203190
135 26 BTB-00926636 17857480 Milk 17817223 17817227 34750
136 26 BTB-00926636 17857480 Milk 17817223 17817227 195586
137 26 BTB-00926636 17857480 Milk 17821297 17821301 197235
138 26 BTB-00926636 17857480 Milk 17821297 17821301 202455
139 26 BTB-00926636 17857480 Milk 17822209 17822213 197783
140 26 BTB-00926636 17857480 Milk 17822209 17822213 202983
141 26 BTB-00926636 17857480 Milk 17823105 17823109 196204
142 26 BTB-00926636 17857480 Milk 17823105 17823109 201419
143 26 BTB-00926636 17857480 Milk 17824770 17824774 196867
144 26 BTB-00926636 17857480 Milk 17824770 17824774 202101
145 26 BTB-00926636 17857480 Milk 17833551 17833555 195748
146 26 BTB-00926636 17857480 Milk 17833551 17833555 200855
147 26 BTB-00926636 17857480 Milk 17833551 17833555 206213
148 26 BTB-00926636 17857480 Milk 17865069 17865073 197144
149 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 197144
150 26 BTB-00926636 17857480 Milk 17865069 17865073 202367
151 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 202367
152 26 BTB-00926636 17857480 Milk 17865069 17865073 208485
153 26 BTA-62125-no-rs 17899619 Milk 17865069 17865073 208485
154 26 BTB-00926636 17857480 Milk 17871201 17871205 197611
155 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 197611
156 26 BTB-00926636 17857480 Milk 17871201 17871205 202816
157 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 202816
158 26 BTB-00926636 17857480 Milk 17871201 17871205 209076
159 26 BTA-62125-no-rs 17899619 Milk 17871201 17871205 209076
160 26 BTB-00926636 17857480 Milk 17885871 17885875 63656
161 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 63656
162 26 BTB-00926636 17857480 Milk 17885871 17885875 199068
163 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 199068
164 26 BTB-00926636 17857480 Milk 17885871 17885875 204113
165 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 204113
166 26 BTB-00926636 17857480 Milk 17885871 17885875 204114
167 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 204114
168 26 BTB-00926636 17857480 Milk 17885871 17885875 210399
169 26 BTA-62125-no-rs 17899619 Milk 17885871 17885875 210399
170 26 BTB-00926636 17857480 Milk 17895273 17895277 63657
171 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 63657
172 26 BTB-00926636 17857480 Milk 17895273 17895277 199069
173 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 199069
174 26 BTB-00926636 17857480 Milk 17895273 17895277 204115
175 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 204115
176 26 BTB-00926636 17857480 Milk 17895273 17895277 204116
177 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 204116
178 26 BTB-00926636 17857480 Milk 17895273 17895277 210400
179 26 BTA-62125-no-rs 17899619 Milk 17895273 17895277 210400
180 26 BTB-00926636 17857480 Milk 17895803 17895807 199070
181 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 199070
182 26 BTB-00926636 17857480 Milk 17895803 17895807 204117
183 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 204117
184 26 BTB-00926636 17857480 Milk 17895803 17895807 204118
185 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 204118
186 26 BTB-00926636 17857480 Milk 17895803 17895807 210401
187 26 BTA-62125-no-rs 17899619 Milk 17895803 17895807 210401
188 26 BTB-00926636 17857480 Milk 17900304 17900308 199071
189 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 199071
190 26 BTB-00926636 17857480 Milk 17900304 17900308 204119
191 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 204119
192 26 BTB-00926636 17857480 Milk 17900304 17900308 204120
193 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 204120
194 26 BTB-00926636 17857480 Milk 17900304 17900308 210402
195 26 BTA-62125-no-rs 17899619 Milk 17900304 17900308 210402
196 26 BTB-00926636 17857480 Milk 17902932 17902936 63658
197 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 63658
198 26 BTB-00926636 17857480 Milk 17902932 17902936 199072
199 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 199072
200 26 BTB-00926636 17857480 Milk 17902932 17902936 204121
201 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 204121
202 26 BTB-00926636 17857480 Milk 17902932 17902936 204122
203 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 204122
204 26 BTB-00926636 17857480 Milk 17902932 17902936 210403
205 26 BTA-62125-no-rs 17899619 Milk 17902932 17902936 210403
206 26 BTB-00926636 17857480 Milk 17906861 17906865 63660
207 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 63660
208 26 BTB-00926636 17857480 Milk 17906861 17906865 199074
209 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 199074
210 26 BTB-00926636 17857480 Milk 17906861 17906865 204125
211 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 204125
212 26 BTB-00926636 17857480 Milk 17906861 17906865 204126
213 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 204126
214 26 BTB-00926636 17857480 Milk 17906861 17906865 210405
215 26 BTA-62125-no-rs 17899619 Milk 17906861 17906865 210405
216 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 63654
217 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 199076
218 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 204129
219 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 204130
220 26 BTA-62125-no-rs 17899619 Milk 17914357 17914361 210407
221 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 63655
222 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 197826
223 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 203026
224 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 203027
225 26 BTA-62125-no-rs 17899619 Milk 17915872 17915876 209354
226 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 195860
227 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 201008
228 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 201009
229 26 BTA-62125-no-rs 17899619 Milk 17922107 17922111 206462
230 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 33984
231 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195668
232 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195669
233 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 195670
234 26 BTA-62125-no-rs 17899619 Milk 17928246 17928250 200758
235 26 BTA-62125-no-rs 17899619 Milk 17931209 17931213 199092
236 26 BTA-62125-no-rs 17899619 Milk 17931209 17931213 204151
237 28 BTA-99379-no-rs 41666186 Reproduction 41646434 41646438 107048
238 28 BTA-99379-no-rs 41666186 Reproduction 41646434 41646438 107227
239 28 ARS-BFGL-NGS-11935 44942879 Reproduction 44971508 44971512 238444
240 28 ARS-BFGL-NGS-11935 44942879 Production 44981682 44981686 66703
241 28 ARS-BFGL-NGS-11935 44942879 Production 44981682 44981686 66704
242 28 ARS-BFGL-NGS-11935 44942879 Production 44981682 44981686 69429
243 28 Hapmap43005-BTA-64606 45805105 Milk 45811156 45811160 34467

18.1 QTL annotation on GALLO

QTL type

#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)

My Image

QTL type

#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")

par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")

Reproduction My Image

Health My Image

18.2 QTL enrichment on GALLO

#QTL enrichment analysis 
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]

write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_enrich_qtl_genome_name_15.csv")

out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_enrich_qtl_genome_type_15.csv")

#Plots

#Name

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
15 Milk C14 index 35 4437 209 163224 0.0000000 0.0000000 Milk
28 Milk myristoleic acid content 26 3047 209 163224 0.0000000 0.0000000 Milk
31 Milk protein percentage 39 8803 209 163224 0.0000000 0.0000000 Milk
29 Milk palmitoleic acid content 15 2422 209 163224 0.0000007 0.0000079 Milk
16 Milk C16 index 12 2002 209 163224 0.0000129 0.0001159 Milk
36 Muscle carnosine content 3 67 209 163224 0.0000933 0.0006998 Meat and Carcass
23 Milk lactose content 3 105 209 163224 0.0003523 0.0022648 Milk
20 Milk fat content 2 92 209 163224 0.0063319 0.0356168 Milk
27 Milk myristic acid content 4 902 209 163224 0.0294859 0.1449683 Milk
33 Milk stearic acid content 2 218 209 163224 0.0322152 0.1449683 Milk
30 Milk pentadecylic acid content 2 280 209 163224 0.0505491 0.2067917 Milk
44 Teat length 2 300 209 163224 0.0570974 0.2141153 Exterior
14 Milk arachidic acid content 1 60 209 163224 0.0740084 0.2220251 Milk
25 Milk margaric acid content 1 59 209 163224 0.0728207 0.2220251 Milk
26 Milk mid-infrared spectra 1 55 209 163224 0.0680550 0.2220251 Milk
8 Ketosis 2 441 209 163224 0.1101333 0.2915293 Health
40 Rear leg placement - side view 2 430 209 163224 0.1056354 0.2915293 Exterior
39 Pregnancy rate 3 944 209 163224 0.1218012 0.3045029 Reproduction
37 Myristic acid content 1 120 209 163224 0.1425637 0.3376510 Meat and Carcass
6 Fat thickness at the 12th rib 1 169 209 163224 0.1947851 0.4382665 Meat and Carcass
7 Foot angle 2 672 209 163224 0.2129475 0.4563160 Exterior
13 Maturity rate 1 243 209 163224 0.2677107 0.5072331 Production
17 Milk C18 index 1 246 209 163224 0.2705243 0.5072331 Milk
42 Stillbirth 3 1363 209 163224 0.2544359 0.5072331 Reproduction
18 Milk capric acid content 2 912 209 163224 0.3259016 0.5640605 Milk
38 Net merit 2 903 209 163224 0.3216964 0.5640605 Production
24 Milk lauric acid content 1 366 209 163224 0.3746664 0.6021425 Milk
32 Milk protein yield 5 3093 209 163224 0.3633449 0.6021425 Milk
41 Somatic cell score 2 1122 209 163224 0.4213659 0.6538437 Health
5 Dairy form 1 514 209 163224 0.4829472 0.7244208 Exterior
3 Calving ease 5 3819 209 163224 0.5419946 0.7725995 Reproduction
9 Lean meat yield 1 621 209 163224 0.5494041 0.7725995 Meat and Carcass
43 Strength 1 664 209 163224 0.5736508 0.7813075 Exterior
45 Udder depth 1 695 209 163224 0.5903212 0.7813075 Exterior
19 Milk caprylic acid content 1 811 209 163224 0.6471450 0.8320436 Milk
10 Length of productive life 2 2004 209 163224 0.7280866 0.8855108 Production
35 Milking speed 1 1011 209 163224 0.7273020 0.8855108 Milk
2 Body weight gain 1 1354 209 163224 0.8248432 0.9440581 Production
4 Conception rate 1 1255 209 163224 0.8009513 0.9440581 Reproduction
11 Longissimus muscle area 1 1420 209 163224 0.8391628 0.9440581 Meat and Carcass
12 Marbling score 1 1817 209 163224 0.9037804 0.9830306 Meat and Carcass
34 Milk yield 5 6432 209 163224 0.9174952 0.9830306 Milk
1 Body weight 2 4289 209 163224 0.9746418 0.9967928 Production
21 Milk fat percentage 8 10941 209 163224 0.9726277 0.9967928 Milk
22 Milk fat yield 1 8220 209 163224 0.9999797 0.9999797 Milk

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Exterior 9 9077 209 163224 0.8264587 1
2 Health 4 5889 209 163224 0.9456284 1
3 Meat and Carcass 8 18258 209 163224 0.9999640 1
4 Milk 168 75352 209 163224 0.0000000 0
5 Production 8 19640 209 163224 0.9999916 1
6 Reproduction 12 35008 209 163224 1.0000000 1

My Image

19 GPROFILER

online version: https://biit.cs.ut.ee/gprofiler/gost

I got a script from Julia Rodrigues about the R package GPROFILER to perform an enrichment of my Genes.

### enriquecimento genico
#install.packages("gprofiler2")
library(gprofiler2)

#Para conferir a lista de organism -> https://biit.cs.ut.ee/gprofiler/page/organism-list

#Obs: eu entro com os ids ENSOAR...
query <- read.table ("/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_genes_50k_15.txt", header = T)
query <- query[,c("gene_id")]

gene_enrich <- gost(
  query,
  organism = "btaurus", 
  ordered_query = FALSE,
  multi_query = FALSE,
  significant = TRUE,
  exclude_iea = FALSE,
  measure_underrepresentation = FALSE,
  evcodes = TRUE,
  user_threshold = 0.05,
  correction_method = c("fdr"),
  domain_scope = c("annotated"),
  numeric_ns = "",
  sources = NULL,
  as_short_link = FALSE,
  highlight = FALSE
)


#str(gene_enrich) # para ver o formato dos meus dados

#selecionando apenas as informações da lista que me interessam para fazer meu data.frame 
result_enrich <- data.frame(gene_enrich$result)
result_enrich <- data.frame(Category = result_enrich$source,
                            ID = result_enrich$term_id,
                            Term = result_enrich$term_name,
                            adj_pvalue = result_enrich$p_value,
                            id_ensembl = result_enrich$intersection)

write.table(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/Bonf_5_15/gene_enrich_15.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=F)
write.csv(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/Bonf_5_15/gene_enrich_15.csv")

Below we can se the significant terms of this enrichment (output):

X Category ID Term adj_pvalue id_ensembl
1 GO:MF GO:0004352 glutamate dehydrogenase (NAD+) activity 0.0290548 ENSBTAG00000007540
2 GO:MF GO:0004353 glutamate dehydrogenase [NAD(P)+] activity 0.0290548 ENSBTAG00000007540
3 GO:MF GO:0016639 oxidoreductase activity, acting on the CH-NH2 group of donors, NAD or NADP as acceptor 0.0290548 ENSBTAG00000007540
4 GO:MF GO:0031702 type 1 angiotensin receptor binding 0.0290548 ENSBTAG00000012393
5 GO:MF GO:0031703 type 2 angiotensin receptor binding 0.0290548 ENSBTAG00000012393
6 GO:MF GO:0005026 transforming growth factor beta receptor activity, type II 0.0414893 ENSBTAG00000019832
7 GO:MF GO:0102193 protein-ribulosamine 3-kinase activity 0.0414893 ENSBTAG00000015414

19.1 GPROFILER ON-LINE

From the online version of GPROFILER i got the following results.

Legend My Image

My Image

My Image

The GO CONTEXT provided by GPROFILER can be seen in the image below:

My Image

19.2 Terms distribution Pie Chart

There was no necessity to make this plot.

20 Haplotype Blocks

First I need to generate the .ped file

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file

plink \
    --bfile ${DATA} \
    --cow \
    --recode \
    --out ${RESULT}

Second, is necessary to convert the .ped file to Linkage format:

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/hapblock_in

plink \
    --file ${DATA} \
    --cow \
    --recode HV \
    --out ${RESULT}

The code above will generate a pair of files .ped and .info for each chromosome

I ran the haploview only for chromosome 03, 11, 14, 19, 20, 22, 23, 26 and 28 that are when the significant SNP are located.

As the LD Plot bring the SNP ordered, first I looked at /home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_15.csv, to know the SNP position, and then compare with the Check Markers tab in the haploview SNP position x SNP# in plot

My Image

VEP rs SNP Chr bp A1 A2 Freq Haploview.Order Block
yes rs109888380 ARS-BFGL-NGS-4585 3 107160292 A C 0.3353170 1818
yes rs41624254 BTA-122098-no-rs 11 31882807 A C 0.4444440 674
no rs42217767 BTB-01060598 11 32301594 C A 0.4503970 680
yes rs41632193 Hapmap39823-BTA-35254 14 64061208 A C 0.2916670 1196
yes rs41632223 UA-IFASA-7664 14 64299714 C A 0.2678570 1200
yes rs109751680 ARS-BFGL-NGS-28901 19 42338148 C A 0.3710320 713
yes rs41577586 ARS-BFGL-NGS-29119 19 49744592 A C 0.4047620 849 Block 38
yes rs42590312 BTB-01471673 20 11545685 A C 0.2460320 232
yes rs110031217 ARS-BFGL-NGS-23411 22 5134078 A C 0.1865080 88
yes rs110991998 ARS-BFGL-NGS-26419 22 46052082 A C 0.0813492 762
no rs29022819 Hapmap54016-rs29022819 23 33609998 C A 0.1904760 534
yes rs41644634 BTA-62125-no-rs 26 17899619 C A 0.0257937 297 Block 10
yes rs42089058 BTB-00926636 26 17857480 C A 0.0257937 296 Block 10
yes rs109578343 ARS-BFGL-NGS-11935 28 44942879 A C 0.0297619 773
yes rs41567074 BTA-99379-no-rs 28 41666186 A C 0.0496032 701
yes rs41648479 Hapmap43005-BTA-64606 28 45805105 A C 0.0297619 791 Block 21

As can be observed in the column “Block”,four significant SNPs fell within a Haploblock.

rs41577586 (ARS-BFGL-NGS-29119) - chr 19 - order 849 - Block 38 rs41644634 (BTA-62125-no-rs) - chr 26 - order 297 - Block 10 rs42089058 (BTB-00926636) - chr 26 - order 296 - Block 10 rs41648479 (Hapmap43005-BTA-64606) - chr 28 - order 791 - Block 21

Chr 3 (SNP 1818) My Image

Chr 11 (SNP 674 and 680) My Image

Chr 14 (SNP 1196 and 1200) My Image

Chr 19 (SNP 713) My Image

Chr 19 (SNP 849) My Image

Chr 20 (SNP 232) My Image

Chr 22 (SNP 88) My Image

Chr 22 (SNP 762) My Image

Chr 23 (SNP 534) My Image

Chr 26 (SNP 296 and 297) My Image

Chr 28 (SNP 701) My Image

Chr 28 (SNP 773 and 791) My Image

21 Genetic Correlation

To assess the correlation between Cortisol phenotypes and Genomic Estimated Breeding Values (GEBVs), we opt for a linear regression instead of a standard correlation test. This decision is driven by the non-normal distribution of our Cortisol phenotypes, which violates the assumptions required for traditional correlation tests.

Linear regression offers a robust alternative as it does not necessitate normality for the dependent variable. By regressing GEBVs over Cortisol, we can model the relationship between these variables. Our aim is to estimate the regression coefficient, which serves as our correlation estimate.

Due to the violation of normality assumptions for the dependent variable (Cortisol), traditional correlation tests may not provide reliable results, particularly in assessing the significance of the correlation. Therefore, alternative approaches, such as linear regression, are preferred as they do not require the same assumptions about the distribution of the dependent variable. By using linear regression, we can still assess the relationship between Cortisol and GEBVs while accommodating the non-normality of Cortisol phenotypes.

The regression model can be represented as follows: \[ y = \beta_0 + \beta_1 \times GEBV_{\text{Milk}} + \epsilon \]

Where:

This approach enables us to quantify the relationship between Cortisol and GEBVs, addressing the non-normality of Cortisol phenotypes while allowing for formal hypothesis testing of the correlation’s significance.

21.1 Data preparation

The first data I received from Lucas had only 135 animals out of 260 with values the other 125 had only NA I shown this to Lucas Lucas wrote to Alisson Lucas sent me the missing animals I merged this two files

rm(list = ls())

# Load the necessary library
library(dplyr)
library(tidyverse)

cortisol_260 <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")

#This is the first dataframe with information for 135 animals and 125 NA
GEBVs1 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora.csv")
#This is the second file with information for the 125 NA animals
GEBVs2 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/elora_missing_females_2404_06_11_2024.csv")
#This are de columns we can use because we know the meaning of the acronyms
GEBVs_to_use <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebv_names_lucas_06102024_BAG.csv")


sum(is.na(GEBVs1$MILK))
GEBVs1<- GEBVs1[which(is.na(GEBVs1[,"DHI_BARN_NAME"]) == F),]

sum(!is.na(GEBVs2$MILK))
GEBVs2<- GEBVs2[which(is.na(GEBVs2[,"DHI_BARN_NAME"]) == F),]

print(GEBVs1$DHI_BARN_NAME)
print(GEBVs2$DHI_BARN_NAME)

# Making the two dataframes with the same columns
# Remove elora_id and international_id from GEBVs1
GEBVs1 <- GEBVs1 %>% select(-elora_id, -international_id)

# Remove ANIMAL_ID from GEBVs2
GEBVs2 <- GEBVs2 %>% select(-ANIMAL_ID)

# Check if the two dataframes have the same columns
have_same_columns <- all(names(GEBVs1) == names(GEBVs2))

if (have_same_columns) {
  print("The dataframes have the same columns.")
} else {
  print("The dataframes do not have the same columns.")
}


# Check if the column names are in the same order
same_order <- identical(names(GEBVs1), names(GEBVs2))

if (same_order) {
  print("The columns are in the same order.")
} else {
  print("The columns are not in the same order.")
}

GEBVs_combined <- rbind(GEBVs1, GEBVs2)

# Sort the columns
sorted_cortisol_260 <- sort(cortisol_260$ID)
sorted_GEBVs_combined <- sort(GEBVs_combined$DHI_BARN_NAME)

# Check if the sorted columns have the same values
identical(sorted_cortisol_260, sorted_GEBVs_combined)

# Create a duplicate of the column 'DHI_BARN_NAME' and name it 'elora_id'
GEBVs_combined$elora_id <- GEBVs_combined$DHI_BARN_NAME

# Assuming GEBVs_combined is your data frame
GEBVs_combined <- GEBVs_combined %>%
  select(elora_id, DHI_BARN_NAME, everything())

write.csv(GEBVs_combined, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora_complete.csv")

# Merging the dataframe with Cortisol values, with the dataframe with GEBVs values
Merg_Cort_GEBVs <- merge(cortisol_260, GEBVs_combined, by.x = "ID", by.y = "elora_id")

write.csv(Merg_Cort_GEBVs, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/Merged_Cortisol_GEBVs.csv")

#Opening the file with the GEBVs columns to use
Columns_to_use <- readLines("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/traits_to_use.txt")

colnames(Merg_Cort_GEBVs)[405] <- "IDD"

data <- select(Merg_Cort_GEBVs, ID, T4Cortisol, BIRTH_YEAR, all_of(Columns_to_use))

# The data below has the the 55 GEBVs + Cortisol data + Birth Year
write.csv(data, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits.csv")

samp_date2 <- read.csv("/home/bambrozi/2_CORTISOL/Data/Elora animal_ids_kl_sampling_date.csv")

# Convert Sampling_date to Date using as.Date
samp_date$Sampling_date <- as.Date(samp_date$Sampling_date, format = "%m/%d/%Y")

table(samp_date$Sampling_date)

samp_date <- select(samp_date, Elora_id, Sampling_date)

# Check if data$ID and samp_dates$elora_id are identical in values and order
identical(data$ID, samp_date$Elora_id)

data_final <- merge(data, samp_date, by.x="ID", by.y="Elora_id")

data_final <- data_final %>%
  select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date, everything())

# The data below has the the 55 GEBVs + Cortisol data + Birth Year + Sampling data
write.csv(data_final, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")

ps. I double checked by hand the select and merge process against the original tables received and is everything ok.

21.2 Correlations - Linear Regression

21.2.1 Test with one trait

# Fit the linear regression model
model <- lm(T4Cortisol ~ MILK, data = data)

# Summarize the model to get the regression coefficients and statistical summary
model_summary <- summary(model)

# Extract the desired statistics
multiple_r_squared <- model_summary$r.squared
adjusted_r_squared <- model_summary$adj.r.squared
f_statistic <- model_summary$fstatistic[1]
p_value <- pf(model_summary$fstatistic[1], model_summary$fstatistic[2], model_summary$fstatistic[3], lower.tail = FALSE)

# Combine the statistics into a data frame
results <- data.frame(
  Multiple_R_Squared = multiple_r_squared,
  Adjusted_R_Squared = adjusted_r_squared,
  F_Statistic = f_statistic,
  P_Value = p_value
)

# Save the results to a CSV file
write.csv(results, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_milk.csv", row.names = FALSE)

# Plot the linear regression between T4Cortisol and MILK
ggplot(data, aes(x = MILK, y = T4Cortisol)) +
  geom_point() +  # Add points
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  # Add linear regression line
  labs(title = "Linear Regression of T4Cortisol on MILK",
       x = "MILK",
       y = "T4Cortisol") +
  theme_minimal()

Plot linear regression Cortisol vs. Milk GEBV My Image

Summary statistics

Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value
5.2e-05 -0.0038237 0.0134257 0.9078463

21.2.2 Loop for all traits

As we have 54 GEBVs to fit a linear regression we designed a loop:

# Initialize a list to store the results
results_list <- list()

# Loop through columns 4 to ncol(data) for the GEBVs
for (i in 4:ncol(data)) {
  trait_name <- colnames(data)[i]
  
  # Fit the linear regression model
  model <- lm(data[[2]] ~ data[[i]], data = data)
  
  # Summarize the model
  model_summary <- summary(model)
  
  # Extract the desired statistics
  multiple_r_squared <- model_summary$r.squared
  adjusted_r_squared <- model_summary$adj.r.squared
  f_statistic <- model_summary$fstatistic[1] # F-statistic value
  f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
  f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
  p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
  
  # Extract the coefficient and its p-value for the trait
  coef_summary <- coef(model_summary)
  trait_coef <- coef_summary[2, "Estimate"]  # Assumes the trait is the second predictor
  trait_p_value <- coef_summary[2, "Pr(>|t|)"]
  
  # Combine the statistics into a data frame
  result <- data.frame(
    Trait = trait_name,
    Multiple_R_Squared = multiple_r_squared,
    Adjusted_R_Squared = adjusted_r_squared,
    F_Statistic = f_statistic,
    P_Value = p_value,
    Coefficient = trait_coef,
    Coefficient_P_Value = trait_p_value
  )
  
  # Append the result to the results list
  results_list[[i - 3]] <- result
}

# Combine all results into a single data frame
results_df_0 <- do.call(rbind, results_list)

# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_all_traits.csv", row.names = FALSE)

Summary statistics for all Traits’ GEBVs

Trait Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value Coefficient Coefficient_P_Value
BMR 0.0400473 0.0363266 10.7632421 0.0011781 21.1128882 0.0011781
CO 0.0280949 0.0243278 7.4580136 0.0067510 -13.2769834 0.0067510
MSL 0.0198445 0.0160455 5.2235464 0.0230946 -16.9454262 0.0230946
CTFS 0.0185227 0.0147186 4.8690531 0.0282237 15.8469990 0.0282237
UT 0.0166425 0.0128310 4.3664218 0.0376334 -15.6741466 0.0376334
MS 0.0128559 0.0090298 3.3600230 0.0679494 -14.7558389 0.0679494
CK 0.0106156 0.0067808 2.7682148 0.0973674 14.6866464 0.0973674
IH 0.0101222 0.0062855 2.6382359 0.1055404 -7.3062481 0.1055404
DD 0.0100161 0.0061790 2.6103042 0.1073935 -9.5352332 0.1073935
CONF 0.0095452 0.0057062 2.4863924 0.1160600 -13.9681677 0.1160600
UD 0.0085295 0.0046866 2.2195470 0.1374945 -15.7518165 0.1374945
SU 0.0077590 0.0039131 2.0174678 0.1567056 7.3716271 0.1567056
MT 0.0076274 0.0037810 1.9829901 0.1602796 -10.6502458 0.1602796
LP 0.0074242 0.0035770 1.9297608 0.1659824 9.2112199 0.1659824
MET 0.0070642 0.0032156 1.8355367 0.1766601 -7.6934858 0.1766601
MR 0.0066755 0.0028254 1.7338637 0.1890867 -7.8752018 0.1890867
DO 0.0060791 0.0022267 1.5779921 0.2101864 9.5682702 0.2101864
DHL 0.0052895 0.0014340 1.3719419 0.2425591 -9.4618477 0.2425591
MOB 0.0047368 0.0008792 1.2279220 0.2688435 7.8139790 0.2688435
DF 0.0046402 0.0007822 1.2027541 0.2737945 7.8603320 0.2737945
FAT 0.0046204 0.0007623 1.1975980 0.2748229 1.0090961 0.2748229
SH 0.0043235 0.0004643 1.1203076 0.2908422 5.2472606 0.2908422
DS 0.0043121 0.0004528 1.1173348 0.2914818 -7.5550660 0.2914818
FOOT 0.0043005 0.0004412 1.1143288 0.2921304 26.7441735 0.2921304
FA 0.0042762 0.0004168 1.1079850 0.2935052 -8.0817790 0.2935052
PROT 0.0040026 0.0001421 1.0368190 0.3095163 1.2915168 0.3095163
WL 0.0038262 -0.0000350 0.9909414 0.3204451 5.2197508 0.3204451
MDR 0.0038096 -0.0000516 0.9866366 0.3214965 7.0545996 0.3214965
CA 0.0036379 -0.0002239 0.9420177 0.3326685 7.4603427 0.3326685
ME 0.0034176 -0.0004452 0.8847561 0.3477821 -5.0950658 0.3477821
SCS 0.0031318 -0.0007321 0.8105371 0.3688011 -4.8756313 0.3688011
FE 0.0028874 -0.0009774 0.7470985 0.3881993 -5.1774356 0.3881993
FTP 0.0027874 -0.0010778 0.7211475 0.3965550 9.8351267 0.3965550
AFS 0.0025212 -0.0013450 0.6521188 0.4201001 -5.9185799 0.4201001
UF 0.0018686 -0.0020001 0.4830008 0.4876916 6.8229142 0.4876916
ST 0.0018050 -0.0020640 0.4665276 0.4952019 -7.1228081 0.4952019
MSP 0.0014890 -0.0023812 0.3847251 0.5356327 -3.5290172 0.5356327
HL 0.0013205 -0.0025503 0.3411423 0.5596809 -4.3615962 0.5596809
SCK 0.0011411 -0.0027305 0.2947310 0.5876733 3.3934702 0.5876733
TL 0.0009008 -0.0029717 0.2326179 0.6299983 -5.0172076 0.6299983
LOC 0.0008542 -0.0030185 0.2205721 0.6390011 -3.5628830 0.6390011
DA 0.0006826 -0.0031907 0.1762328 0.6749803 3.5280623 0.6749803
CW 0.0005859 -0.0032878 0.1512479 0.6976664 -2.7721164 0.6976664
BCS 0.0003594 -0.0035151 0.0927672 0.7609338 2.1412729 0.7609338
RUM 0.0003149 -0.0035598 0.0812707 0.7758114 -1.6952340 0.7758114
TU 0.0002204 -0.0036547 0.0568793 0.8116875 1.0400512 0.8116875
FL 0.0001793 -0.0036960 0.0462618 0.8298705 1.5296709 0.8298705
BQ 0.0001697 -0.0037056 0.0437957 0.8343993 -1.5434870 0.8343993
HHE 0.0000521 -0.0038236 0.0134478 0.9077707 0.6475671 0.9077707
MILK 0.0000520 -0.0038237 0.0134257 0.9078463 0.0042447 0.9078463
HH 0.0000509 -0.0038249 0.0131340 0.9088482 -0.6606061 0.9088482
BD 0.0000293 -0.0038466 0.0075535 0.9308097 -0.6433874 0.9308097
DCA 0.0000046 -0.0038713 0.0011932 0.9724707 -0.2685932 0.9724707
FEED 0.0000001 -0.0038759 0.0000152 0.9968903 -0.0190613 0.9968903
Five out 54 traits GEBVs presented significant correlations with Cortisol:
  • BMR = Body Maintenance Requirements
  • CO = Cystic ovaries
  • MSL = Median Suspensory Ligament
  • CTFS = Calving to First Service
  • UT = Udder Texture

21.2.3 Adding BIRTH_YEAR

21.2.3.1 Model Description

The regression model added the BY is shown bellow:

\[ y = \beta_0 + \beta_1 \times GEBV_{\text{Trait}} + BIRTH\_YEAR + \epsilon \]

Where:

  • \(y\) represents Cortisol phenotypes.
  • \(GEBV_{\text{Trait}}\) denotes the GEBV for the specific trait (e.g., Milk Yield).
  • \(BIRTH\_YEAR\) is the birth year of the subjects, included as a factor.
  • \(\beta_0\) and \(\beta_1\) are the intercept and regression coefficient, respectively.
  • \(\epsilon\) represents the error term capturing unexplained variability.

The BIRTH_YEAR variable is converted to a factor to account for the categorical nature of birth years.

# Convert BIRTH_YEAR to a factor and rename
data$BIRTH_YEAR <- as.factor(data$BIRTH_YEAR)

# Initialize a list to store the results
results_list <- list()

# Loop through columns 3 to ncol(data) for the GEBVs
for (i in 4:ncol(data)) {
  trait_name <- colnames(data)[i]
  
  # Fit the linear regression model with BIRTH_YEAR as an additional predictor
  model <- lm(data[[2]] ~ data[[i]] + BIRTH_YEAR, data = data)
  
  # Summarize the model
  model_summary <- summary(model)
  
  # Extract the desired statistics
  multiple_r_squared <- model_summary$r.squared
  adjusted_r_squared <- model_summary$adj.r.squared
  f_statistic <- model_summary$fstatistic[1] # F-statistic value
  f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
  f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
  p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
  
  # Extract the coefficient and its p-value for the trait
  coef_summary <- coef(model_summary)
  trait_coef <- coef_summary[2, "Estimate"]  # Assumes the trait is the second predictor
  trait_p_value <- coef_summary[2, "Pr(>|t|)"]
  
  # Combine the statistics into a data frame
  result <- data.frame(
    Trait = trait_name,
    Multiple_R_Squared = multiple_r_squared,
    Adjusted_R_Squared = adjusted_r_squared,
    F_Statistic = f_statistic,
    P_Value = p_value,
    Coefficient = trait_coef,
    Coefficient_P_Value = trait_p_value
  )
  
  # Append the result to the results list
  results_list[[i - 2]] <- result
}

# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)

# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv", row.names = FALSE)

Summary statistics for all Traits’ GEBVs adding BIRTY_YEAR

Trait Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value Coefficient Coefficient_P_Value
BMR 0.0881915 0.0738886 6.165997 0.0000945 19.1527245 0.0028131
CO 0.0803421 0.0659161 5.569252 0.0002590 -12.5790675 0.0094097
MSL 0.0734487 0.0589145 5.053527 0.0006186 -16.2613977 0.0277614
IH 0.0719150 0.0573568 4.939831 0.0007494 -9.3449051 0.0354801
CTFS 0.0705440 0.0559642 4.838504 0.0008891 14.3231800 0.0442704
UT 0.0704128 0.0558310 4.828827 0.0009037 -14.8606501 0.0452229
MS 0.0686230 0.0540132 4.697043 0.0011284 -14.8462730 0.0606040
CK 0.0670600 0.0524256 4.582365 0.0013689 15.4450943 0.0785803
PROT 0.0658116 0.0511577 4.491053 0.0015963 2.1108214 0.0970292
DD 0.0638377 0.0491528 4.347166 0.0020332 -8.6326827 0.1365405
FAT 0.0628176 0.0481167 4.273045 0.0023028 1.2796392 0.1637385
SU 0.0623970 0.0476895 4.242529 0.0024239 7.0165690 0.1767005
CONF 0.0621537 0.0474424 4.224892 0.0024968 -11.6231634 0.1847326
MET 0.0618359 0.0471196 4.201861 0.0025952 -7.2376432 0.1958719
MR 0.0617517 0.0470341 4.195767 0.0026218 -7.5588944 0.1989504
FOOT 0.0614269 0.0467042 4.172251 0.0027273 31.1509094 0.2113774
MOB 0.0613973 0.0466741 4.170111 0.0027372 8.6217376 0.2125534
LP 0.0613481 0.0466241 4.166549 0.0027536 8.0864569 0.2145285
FTP 0.0608284 0.0460963 4.128971 0.0029327 13.5010333 0.2367661
DO 0.0607904 0.0460577 4.126220 0.0029463 8.8960747 0.2385010
ME 0.0607825 0.0460496 4.125648 0.0029491 -6.2944604 0.2388636
UD 0.0606216 0.0458862 4.114024 0.0030071 -12.1091142 0.2463830
FA 0.0605549 0.0458185 4.109209 0.0030315 -8.6648464 0.2495819
MT 0.0601784 0.0454361 4.082026 0.0031728 -8.2421107 0.2686411
CA 0.0599002 0.0451535 4.061948 0.0032814 8.3011337 0.2838987
DF 0.0598369 0.0450892 4.057381 0.0033066 7.4854663 0.2875213
DHL 0.0595211 0.0447684 4.034611 0.0034352 -8.1319072 0.3064961
SH 0.0586677 0.0439017 3.973163 0.0038074 4.4345159 0.3666760
MILK 0.0585384 0.0437704 3.963861 0.0038671 0.0328321 0.3771595
MDR 0.0584166 0.0436466 3.955101 0.0039243 6.0467308 0.3874240
ST 0.0578881 0.0431099 3.917120 0.0041817 -7.9781463 0.4369789
SCS 0.0576964 0.0429152 3.903356 0.0042790 -3.9614430 0.4573264
AFS 0.0575918 0.0428089 3.895845 0.0043331 -5.2064660 0.4690656
MSP 0.0575087 0.0427245 3.889880 0.0043765 -3.9533312 0.4787399
DS 0.0573283 0.0425413 3.876936 0.0044723 -4.7841746 0.5009026
WL 0.0572605 0.0424724 3.872073 0.0045088 3.4479700 0.5096796
TL 0.0572547 0.0424665 3.871655 0.0045119 -6.7563254 0.5104479
FE 0.0572054 0.0424165 3.868123 0.0045386 -3.8209975 0.5170084
DA 0.0566761 0.0418789 3.830184 0.0048356 4.3714958 0.5986609
HHE 0.0565353 0.0417358 3.820095 0.0049178 2.6941677 0.6249126
HL 0.0563327 0.0415301 3.805592 0.0050384 -3.1669335 0.6676242
LOC 0.0562600 0.0414563 3.800388 0.0050823 -3.0195770 0.6847865
FL 0.0562527 0.0414489 3.799865 0.0050868 2.8115963 0.6865743
SCK 0.0560192 0.0412117 3.783153 0.0052307 1.9379506 0.7520160
CW 0.0559471 0.0411384 3.777995 0.0052759 -1.9950607 0.7767448
FEED 0.0559443 0.0411356 3.777797 0.0052777 -1.3559978 0.7777588
BQ 0.0559195 0.0411104 3.776021 0.0052933 -1.9570851 0.7870649
TU 0.0558834 0.0410737 3.773438 0.0053162 1.0731908 0.8014578
UF 0.0558449 0.0410347 3.770689 0.0053406 2.2354069 0.8181403
DCA 0.0557115 0.0408991 3.761148 0.0054263 -1.0069789 0.8965483
BD 0.0557011 0.0408885 3.760401 0.0054331 0.8702554 0.9055126
BCS 0.0556786 0.0408657 3.758795 0.0054477 0.6193741 0.9285710
RUM 0.0556595 0.0408464 3.757433 0.0054601 -0.3154634 0.9570449
HH 0.0556561 0.0408428 3.757185 0.0054623 -0.2539972 0.9646162
Fitting Birth_Year to the model made one more trait significativally correlated with cortisol and enhanced the proportio of variation explained by the model:
  • BMR = Body Maintenance Requirements
  • CO = Cystic ovaries
  • MSL = Median Suspensory Ligament
  • CTFS = Calving to First Service
  • UT = Udder Texture
  • IH = Interdigital Hyperplasia

Below we can check the improvement in the percentage of variation in Cortisol explained by the Trait_GEBVs + Birth_Year.

CORR_BY <- read.csv("/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv")

CORR_MIN <- read.csv("/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_all_traits.csv")

CORR_BY <- CORR_BY[, c("Trait", "Adjusted_R_Squared"), drop = FALSE]

CORR_MIN <- CORR_MIN[, c("Trait", "Adjusted_R_Squared"), drop = FALSE]

colnames(CORR_BY)[colnames(CORR_BY) == "Adjusted_R_Squared"] <- "Adjusted_R_Squared_BY"

colnames(CORR_MIN)[colnames(CORR_MIN) == "Adjusted_R_Squared"] <- "Adjusted_R_Squared_MIN"

Corr_R2_comp <- merge(CORR_MIN, CORR_BY, by.x = "Trait", by.y = "Trait")

Corr_R2_comp$Absolute_Change <- Corr_R2_comp$Adjusted_R_Squared_BY - Corr_R2_comp$Adjusted_R_Squared_MIN

write.csv(Corr_R2_comp, "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/absolute_improv_r2.csv")
X Trait Adjusted_R_Squared_Model_MIN Adjusted_R_Squared_MIN.BY Absolute_Change
1 AFS -0.0013450 0.0428089 0.0441539
2 BCS -0.0035151 0.0408657 0.0443809
3 BD -0.0038466 0.0408885 0.0447351
4 BMR 0.0363266 0.0738886 0.0375621
5 BQ -0.0037056 0.0411104 0.0448160
6 CA -0.0002239 0.0451535 0.0453774
7 CK 0.0067808 0.0524256 0.0456448
8 CO 0.0243278 0.0659161 0.0415882
9 CONF 0.0057062 0.0474424 0.0417362
10 CTFS 0.0147186 0.0559642 0.0412457
11 CW -0.0032878 0.0411384 0.0444262
12 DA -0.0031907 0.0418789 0.0450696
13 DCA -0.0038713 0.0408991 0.0447705
14 DD 0.0061790 0.0491528 0.0429738
15 DF 0.0007822 0.0450892 0.0443070
16 DHL 0.0014340 0.0447684 0.0433344
17 DO 0.0022267 0.0460577 0.0438310
18 DS 0.0004528 0.0425413 0.0420884
19 FA 0.0004168 0.0458185 0.0454018
20 FAT 0.0007623 0.0481167 0.0473544
21 FE -0.0009774 0.0424165 0.0433939
22 FEED -0.0038759 0.0411356 0.0450115
23 FL -0.0036960 0.0414489 0.0451449
24 FOOT 0.0004412 0.0467042 0.0462629
25 FTP -0.0010778 0.0460963 0.0471741
26 HH -0.0038249 0.0408428 0.0446677
27 HHE -0.0038236 0.0417358 0.0455595
28 HL -0.0025503 0.0415301 0.0440805
29 IH 0.0062855 0.0573568 0.0510713
30 LOC -0.0030185 0.0414563 0.0444747
31 LP 0.0035770 0.0466241 0.0430471
32 MDR -0.0000516 0.0436466 0.0436982
33 ME -0.0004452 0.0460496 0.0464948
34 MET 0.0032156 0.0471196 0.0439039
35 MILK -0.0038237 0.0437704 0.0475941
36 MOB 0.0008792 0.0466741 0.0457949
37 MR 0.0028254 0.0470341 0.0442086
38 MS 0.0090298 0.0540132 0.0449834
39 MSL 0.0160455 0.0589145 0.0428690
40 MSP -0.0023812 0.0427245 0.0451057
41 MT 0.0037810 0.0454361 0.0416552
42 PROT 0.0001421 0.0511577 0.0510155
43 RUM -0.0035598 0.0408464 0.0444062
44 SCK -0.0027305 0.0412117 0.0439421
45 SCS -0.0007321 0.0429152 0.0436472
46 SH 0.0004643 0.0439017 0.0434374
47 ST -0.0020640 0.0431099 0.0451738
48 SU 0.0039131 0.0476895 0.0437764
49 TL -0.0029717 0.0424665 0.0454382
50 TU -0.0036547 0.0410737 0.0447284
51 UD 0.0046866 0.0458862 0.0411996
52 UF -0.0020001 0.0410347 0.0430348
53 UT 0.0128310 0.0558310 0.0430000
54 WL -0.0000350 0.0424724 0.0425074

Adjusted_R_Squared_Model_MIN = Model without Birth Year
Adjusted_R_Squared_MIN.BY = Model adding Birth Year

21.2.4 Adding BIRTH_YEAR and SAMPLING DATE

The regression model added the BY and SAMPLING DATE is shown bellow:

\[ y = \beta_0 + \beta_1 \times GEBV_{\text{Trait}} + BIRTH\_YEAR + SAMPLING\_DATE + \epsilon \]

Where:

  • \(y\) represents Cortisol phenotypes.
  • \(GEBV_{\text{Trait}}\) denotes the GEBV for the specific trait (e.g., Milk Yield).
  • \(BIRTH\_YEAR\) is the birth year of the subjects, included as a factor.
  • \(SAMPLING\_DATE\) is the cortisol sampling date for the subjects, included as a factor.
  • \(\beta_0\) and \(\beta_1\) are the intercept and regression coefficient, respectively.
  • \(\epsilon\) represents the error term capturing unexplained variability.

The SAMPLING_DATE variable is also converted to a factor to account for the categorical nature of sampling date.

# Convert BIRTH_YEAR to a factor and rename
data_final$BIRTH_YEAR <- as.factor(data_final$BIRTH_YEAR)

# Convert Sampling_data to a factor and rename
data_final$Sampling_date <- as.factor(data_final$Sampling_date)

# Initialize a list to store the results
results_list <- list()

# Loop through columns 3 to ncol(data) for the GEBVs
for (i in 5:ncol(data_final)) {
  trait_name <- colnames(data_final)[i]
  
  # Fit the linear regression model with BIRTH_YEAR as an additional predictor
  model <- lm(data_final[[2]] ~ data_final[[i]] + data_final$BIRTH_YEAR + data_final$Sampling_date , data = data_final)
  
  # Summarize the model
  model_summary <- summary(model)
  
  # Extract the desired statistics
  multiple_r_squared <- model_summary$r.squared
  adjusted_r_squared <- model_summary$adj.r.squared
  f_statistic <- model_summary$fstatistic[1] # F-statistic value
  f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
  f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
  p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
  
  # Extract the coefficient and its p-value for the trait
  coef_summary <- coef(model_summary)
  trait_coef <- coef_summary[2, "Estimate"]  # Assumes the trait is the second predictor
  trait_p_value <- coef_summary[2, "Pr(>|t|)"]
  
  # Combine the statistics into a data frame
  result <- data.frame(
    Trait = trait_name,
    Multiple_R_Squared = multiple_r_squared,
    Adjusted_R_Squared = adjusted_r_squared,
    F_Statistic = f_statistic,
    P_Value = p_value,
    Coefficient = trait_coef,
    Coefficient_P_Value = trait_p_value
  )
  
  # Append the result to the results list
  results_list[[i - 2]] <- result
}

# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)

# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv", row.names = FALSE)

Summary statistics for all Traits’ GEBVs adding BIRTY_YEAR and SAMPLING_DATE

Trait Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value Coefficient Coefficient_P_Value
CO 0.3913617 0.3257757 5.967151 0 -11.1849879 0.0099861
BMR 0.3899307 0.3241904 5.931386 0 14.4612444 0.0135665
LP 0.3858330 0.3196512 5.829896 0 12.2619950 0.0330355
MILK 0.3850052 0.3187343 5.809559 0 0.0671160 0.0396652
PROT 0.3841834 0.3178238 5.789422 0 2.2166395 0.0476301
UT 0.3822859 0.3157218 5.743130 0 -12.1238483 0.0731558
CK 0.3801906 0.3134008 5.692345 0 12.7333709 0.1192726
HHE 0.3795579 0.3126999 5.677076 0 7.4003505 0.1388525
MSP 0.3793006 0.3124149 5.670876 0 -7.2388952 0.1478152
DA 0.3791391 0.3122360 5.666989 0 10.8037510 0.1537699
TU 0.3785722 0.3116080 5.653351 0 -5.2064639 0.1769405
MSL 0.3777098 0.3106526 5.632656 0 -8.6230634 0.2203509
BQ 0.3775224 0.3104450 5.628166 0 -7.7284891 0.2313766
IH 0.3772456 0.3101385 5.621542 0 -4.7354916 0.2488963
ST 0.3767980 0.3096426 5.610839 0 -9.9721644 0.2808121
LOC 0.3766890 0.3095218 5.608233 0 -7.2367609 0.2893509
FAT 0.3765584 0.3093772 5.605116 0 0.8520581 0.3000062
UD 0.3763515 0.3091480 5.600177 0 -9.3297651 0.3179533
CA 0.3757417 0.3084725 5.585641 0 6.0756898 0.3798799
MS 0.3755038 0.3082090 5.579979 0 -6.0375749 0.4085912
SCK 0.3754363 0.3081342 5.578372 0 -4.4161473 0.4173165
SCS 0.3754033 0.3080976 5.577587 0 3.9080681 0.4216769
IDD 0.3752677 0.3079474 5.574362 0 3.7029601 0.4403519
FOOT 0.3751830 0.3078536 5.572349 0 17.3309360 0.4526548
TL 0.3750803 0.3077398 5.569908 0 -6.6585204 0.4683140
MET 0.3749055 0.3075462 5.565755 0 -3.4014191 0.4970656
CTFS 0.3747600 0.3073850 5.562300 0 4.0557716 0.5233387
CONF 0.3746137 0.3072229 5.558828 0 -4.7896643 0.5523352
FE 0.3745078 0.3071056 5.556316 0 -2.9375586 0.5752627
HL 0.3743988 0.3069849 5.553731 0 3.4274416 0.6009102
FA 0.3742870 0.3068610 5.551080 0 -3.2821641 0.6298652
DD 0.3742836 0.3068573 5.551001 0 2.5402141 0.6307730
MOB 0.3742556 0.3068263 5.550337 0 3.0592146 0.6385442
FTP 0.3742359 0.3068045 5.549870 0 4.8583743 0.6441419
BCS 0.3741681 0.3067293 5.548263 0 2.6692352 0.6643615
HH 0.3740753 0.3066266 5.546066 0 2.0230249 0.6947785
DF 0.3740392 0.3065865 5.545209 0 2.3681806 0.7076996
FL 0.3739824 0.3065237 5.543865 0 -2.1711035 0.7294613
UF 0.3739605 0.3064994 5.543347 0 2.8880683 0.7384413
MDR 0.3738855 0.3064163 5.541571 0 1.8495373 0.7722558
WL 0.3738548 0.3063822 5.540843 0 1.2643028 0.7878801
AFS 0.3738293 0.3063540 5.540239 0 1.5814380 0.8018670
RUM 0.3738023 0.3063241 5.539601 0 -1.2574561 0.8179113
SH 0.3738015 0.3063233 5.539583 0 1.0342843 0.8184040
BD 0.3736958 0.3062061 5.537080 0 0.7613199 0.9071017
MR 0.3736957 0.3062060 5.537078 0 0.6291752 0.9071898
SU 0.3736871 0.3061965 5.536876 0 0.4745538 0.9186634
FEED 0.3736818 0.3061907 5.536751 0 0.3944746 0.9266753
DHL 0.3736774 0.3061858 5.536647 0 -0.5919776 0.9340753
DO 0.3736757 0.3061838 5.536604 0 0.5220723 0.9373261
DS 0.3736695 0.3061769 5.536458 0 0.3979132 0.9502668
CW 0.3736676 0.3061749 5.536414 0 0.3562368 0.9547808
ME 0.3736658 0.3061729 5.536370 0 -0.2401976 0.9599012
MT 0.3736595 0.3061659 5.536223 0 -0.0976386 0.9882642
DCA 0.3736593 0.3061657 5.536217 0 0.0802555 0.9906432
Fitting Birth_Year and Sampling_date to the model these are the traits with significant correlation:
  • CO = Cystic ovaries
  • BMR = Body Maintenance Requirements
  • LP = Lactation persistency
  • MILK = Milk yield
  • PROT = Protein yield
  • UT = Udder Texture